[llvm-branch-commits] [llvm] [libsycl] Add parallel_for feature (PR #189068)
Kseniya Tikhomirova via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu Apr 23 04:05:29 PDT 2026
https://github.com/KseniyaTikhomirova updated https://github.com/llvm/llvm-project/pull/189068
>From 03a1c675484bf83746ac9cb9b9580e2f3bed238f Mon Sep 17 00:00:00 2001
From: "Tikhomirova, Kseniya" <kseniya.tikhomirova at intel.com>
Date: Wed, 25 Mar 2026 05:18:49 -0700
Subject: [PATCH 1/7] [libsycl] add single_task
Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova at intel.com>
addition to single task
Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova at intel.com>
---
libsycl/docs/index.rst | 4 +
.../sycl/__impl/detail/arg_wrapper.hpp | 135 ++++++++++++++++++
.../sycl/__impl/detail/unified_range_view.hpp | 52 +++++++
libsycl/include/sycl/__impl/queue.hpp | 96 +++++++++++++
libsycl/src/detail/queue_impl.cpp | 112 +++++++++++++++
libsycl/src/detail/queue_impl.hpp | 37 +++++
libsycl/src/queue.cpp | 19 +++
libsycl/test/basic/get_backend.cpp | 54 +++++++
libsycl/test/basic/submit_fn_ptr.cpp | 18 +++
9 files changed, 527 insertions(+)
create mode 100644 libsycl/include/sycl/__impl/detail/arg_wrapper.hpp
create mode 100644 libsycl/include/sycl/__impl/detail/unified_range_view.hpp
create mode 100644 libsycl/test/basic/get_backend.cpp
create mode 100644 libsycl/test/basic/submit_fn_ptr.cpp
diff --git a/libsycl/docs/index.rst b/libsycl/docs/index.rst
index 9aa36b4a54c57..5961eeeedcedb 100644
--- a/libsycl/docs/index.rst
+++ b/libsycl/docs/index.rst
@@ -113,6 +113,10 @@ TODO for added SYCL classes
* to implement submit & copy with accessors (low priority)
* get_info & properties
* ctors that accepts context (blocked by lack of liboffload support)
+ * nd_range kernel submissions
+ * cross-context events wait (host tasks are needed)
+ * implement check if lambda arguments are device copyable (requires clang support of corresponding builtins)
+ * kernel instantiating on host (debugging purposes)
* ``property_list``: to fully implement and integrate with existing SYCL runtime classes supporting it
* usm allocations:
diff --git a/libsycl/include/sycl/__impl/detail/arg_wrapper.hpp b/libsycl/include/sycl/__impl/detail/arg_wrapper.hpp
new file mode 100644
index 0000000000000..96f60a3121787
--- /dev/null
+++ b/libsycl/include/sycl/__impl/detail/arg_wrapper.hpp
@@ -0,0 +1,135 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains helper functions used to wrap kernel arguments to
+/// typeless collection.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBSYCL___IMPL_DETAIL_ARG_WRAPPER_HPP
+#define _LIBSYCL___IMPL_DETAIL_ARG_WRAPPER_HPP
+
+#include <sycl/__impl/detail/config.hpp>
+#include <sycl/__impl/exception.hpp>
+
+#include <cassert>
+#include <memory>
+
+_LIBSYCL_BEGIN_NAMESPACE_SYCL
+
+namespace detail {
+
+/// Base class is needed for unification, we pass arguments through ABI
+/// boundary.
+class ArgWrapperBase {
+public:
+ ArgWrapperBase(const ArgWrapperBase &) = delete;
+ ArgWrapperBase &operator=(const ArgWrapperBase &) = delete;
+ virtual ~ArgWrapperBase() = default;
+
+ virtual void deepCopy() = 0;
+ virtual size_t getSize() const = 0;
+ virtual const void *getPtr() const = 0;
+
+protected:
+ ArgWrapperBase() = default;
+};
+
+/// Helps to manage arguments in a typeless way.
+template <typename Type> class ArgWrapper : public ArgWrapperBase {
+public:
+ ArgWrapper(Type &Arg) { Ptr = &Arg; }
+ ArgWrapper(const ArgWrapper &) = delete;
+ ArgWrapper &operator=(const ArgWrapper &) = delete;
+
+ /// \return size of argument in bytes.
+ size_t getSize() const override { return sizeof(Type); }
+
+ /// Returns raw pointer to the corresponding argument.
+ /// No copy is done by this method. It works with pointer to the memory whose
+ /// existence must be guaranteed by class user or with copy that must be
+ /// explicitly requested by class user via deepCopy method.
+ /// \return pointer to the argument.
+ const void *getPtr() const override {
+ assert((!DeepCopy || (DeepCopy.get()) == Ptr) &&
+ "Incorrect state of copied argument");
+ return Ptr;
+ }
+
+ /// Copies agrument to RT owned storage.
+ void deepCopy() override {
+ if (DeepCopy)
+ return;
+
+ DeepCopy.reset(new Type(*Ptr));
+ Ptr = DeepCopy.get();
+ }
+
+private:
+ Type *Ptr;
+ std::unique_ptr<Type> DeepCopy;
+};
+
+/// Collection of arguments. Provides functionality to accumulate all arguments
+/// data to pass through ABI boundary.
+class ArgCollection {
+public:
+ /// Adds argument to the collection. Don't own the memory. Argument lifetime
+ /// must be guaranteed by class user. If extended lifetime is needed (copy),
+ /// deepCopy must be called.
+ template <typename Type> void addArg(Type &Arg) {
+ MArgs.emplace_back(new ArgWrapper(Arg));
+ }
+
+ /// \return array of argument pointers.
+ const void **getArgPtrArray() {
+ if (MPtrs.size() != MArgs.size()) {
+ MPtrs.clear();
+ MPtrs.reserve(MArgs.size());
+ auto it = MArgs.cbegin();
+ while (it != MArgs.cend()) {
+ MPtrs.push_back((*it++)->getPtr());
+ }
+ }
+ return MPtrs.data();
+ }
+
+ /// \return array of argument sizes.
+ int64_t *getSizesArray() {
+ if (MSizes.size() != MArgs.size()) {
+ MSizes.clear();
+ MSizes.reserve(MArgs.size());
+ auto it = MArgs.cbegin();
+ while (it != MArgs.cend()) {
+ MSizes.push_back(static_cast<int64_t>((*it++)->getSize()));
+ }
+ }
+ return MSizes.data();
+ }
+
+ /// \return count of arguments in collection.
+ size_t getArgCount() { return MArgs.size(); }
+
+ /// Extends arguments lifetime by doing copy of all arguments.
+ void deepCopy() {
+ for (auto &Arg : MArgs)
+ Arg->deepCopy();
+ }
+
+private:
+ std::vector<std::unique_ptr<ArgWrapperBase>> MArgs;
+ std::vector<int64_t> MSizes;
+ std::vector<const void *> MPtrs;
+};
+
+} // namespace detail
+
+_LIBSYCL_END_NAMESPACE_SYCL
+
+#endif // _LIBSYCL___IMPL_DETAIL_ARG_WRAPPER_HPP
diff --git a/libsycl/include/sycl/__impl/detail/unified_range_view.hpp b/libsycl/include/sycl/__impl/detail/unified_range_view.hpp
new file mode 100644
index 0000000000000..afa613fc8627b
--- /dev/null
+++ b/libsycl/include/sycl/__impl/detail/unified_range_view.hpp
@@ -0,0 +1,52 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains helper function class to unify ABI for different kernel
+/// ranges.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBSYCL___IMPL_DETAIL_UNIFIED_RANGE_VIEW_HPP
+#define _LIBSYCL___IMPL_DETAIL_UNIFIED_RANGE_VIEW_HPP
+
+#include <sycl/__impl/detail/config.hpp>
+
+_LIBSYCL_BEGIN_NAMESPACE_SYCL
+
+namespace detail {
+
+/// The structure to keep dimension and references to ranges unified for
+/// all dimensions.
+class UnifiedRangeView {
+
+public:
+ /// Default contructed view matches single task execution range.
+ UnifiedRangeView() = default;
+ UnifiedRangeView(const UnifiedRangeView &Desc) = default;
+ UnifiedRangeView(UnifiedRangeView &&Desc) = default;
+ UnifiedRangeView &operator=(const UnifiedRangeView &Desc) = default;
+ UnifiedRangeView &operator=(UnifiedRangeView &&Desc) = default;
+
+ // TODO: ctors with sycl::range and nd::range will be added later.
+
+ UnifiedRangeView(const size_t *GlobalSize, const size_t *LocalSize,
+ const size_t *Offset, size_t Dims)
+ : MGlobalSize(GlobalSize), MLocalSize(LocalSize), MOffset(Offset),
+ MDims(Dims) {}
+
+ const size_t *MGlobalSize = nullptr;
+ const size_t *MLocalSize = nullptr;
+ const size_t *MOffset = nullptr;
+ size_t MDims = 1;
+};
+} // namespace detail
+
+_LIBSYCL_END_NAMESPACE_SYCL
+
+#endif // _LIBSYCL___IMPL_DETAIL_UNIFIED_RANGE_VIEW_HPP
diff --git a/libsycl/include/sycl/__impl/queue.hpp b/libsycl/include/sycl/__impl/queue.hpp
index 587f56a8eb245..d1ac320433c38 100644
--- a/libsycl/include/sycl/__impl/queue.hpp
+++ b/libsycl/include/sycl/__impl/queue.hpp
@@ -20,9 +20,11 @@
#include <sycl/__impl/event.hpp>
#include <sycl/__impl/property_list.hpp>
+#include <sycl/__impl/detail/arg_wrapper.hpp>
#include <sycl/__impl/detail/config.hpp>
#include <sycl/__impl/detail/default_async_handler.hpp>
#include <sycl/__impl/detail/obj_utils.hpp>
+#include <sycl/__impl/detail/unified_range_view.hpp>
_LIBSYCL_BEGIN_NAMESPACE_SYCL
@@ -31,6 +33,27 @@ class context;
namespace detail {
class QueueImpl;
+template <typename, typename T> struct CheckFunctionSignature {
+ static_assert(std::integral_constant<T, false>::value,
+ "Second template parameter is required to be of function type");
+};
+
+template <typename F, typename RetT, typename... Args>
+struct CheckFunctionSignature<F, RetT(Args...)> {
+private:
+ template <typename T>
+ static constexpr auto check(T *) -> typename std::is_same<
+ decltype(std::declval<T>().operator()(std::declval<Args>()...)),
+ RetT>::type;
+
+ template <typename> static constexpr std::false_type check(...);
+
+ using type = decltype(check<F>(0));
+
+public:
+ static constexpr bool value = type::value;
+};
+
} // namespace detail
// SYCL 2020 4.6.5. Queue class.
@@ -138,12 +161,85 @@ class _LIBSYCL_EXPORT queue {
template <typename Param>
typename Param::return_type get_backend_info() const;
+ /// Defines and invokes a SYCL kernel function as a lambda expression or a
+ /// named function object type.
+ ///
+ /// \param kernelFunc is the kernel functor or lambda.
+ /// \return an event that represents the status of the submitted kernel.
+ template <typename KernelName, typename KernelType>
+ event single_task(const KernelType &kernelFunc) {
+ return single_task<KernelName, KernelType>({}, kernelFunc);
+ }
+
+ /// Defines and invokes a SYCL kernel function as a lambda expression or a
+ /// named function object type.
+ ///
+ /// \param depEvent is an event that specifies the kernel dependency.
+ /// \param kernelFunc is the kernel functor or lambda.
+ /// \return an event that represents the status of the submitted kernel.
+ template <typename KernelName, typename KernelType>
+ event single_task(event depEvent, const KernelType &kernelFunc) {
+ return single_task<KernelName, KernelType>({depEvent}, kernelFunc);
+ }
+
+ /// Defines and invokes a SYCL kernel function as a lambda expression or a
+ /// named function object type.
+ ///
+ /// \param depEvents is a collection of events which specify the kernel
+ /// dependencies.
+ /// \param kernelFunc is the kernel functor or lambda.
+ /// \return an event that represents the status of the submitted kernel.
+ template <typename KernelName, typename KernelType>
+ event single_task(const std::vector<event> &depEvents,
+ const KernelType &kernelFunc) {
+ static_assert(
+ (detail::CheckFunctionSignature<std::remove_reference_t<KernelType>,
+ void()>::value),
+ "sycl::queue::single_task() requires a kernel instead of command "
+ "group. ");
+
+ setKernelParameters(depEvents);
+ submitSingleTask<KernelName, KernelType>(kernelFunc);
+ return getLastEvent();
+ }
+
/// Blocks the calling thread until all commands previously submitted to this
/// queue have completed. Synchronous errors are reported through SYCL
/// exceptions.
void wait();
private:
+ // Name of this function is defined by compiler. It generates call to this
+ // function in the host implementation of KernelFunc in submitSingleTask.
+ template <typename, typename... Args>
+ void sycl_kernel_launch(const char *KernelName, Args &&...args) {
+ static_assert((sizeof...(args) == 1) &&
+ "Only 2 arguments are expected in sycl_kernel_launch.");
+ detail::ArgCollection TypelessArgs;
+ (TypelessArgs.addArg(args), ...);
+
+ submitKernelImpl(KernelName, TypelessArgs);
+ }
+
+#ifdef SYCL_LANGUAGE_VERSION
+# define _LIBSYCL_ENTRY_POINT_ATTR__(KernelName) \
+ [[clang::sycl_kernel_entry_point(KernelName)]]
+#else
+# define _LIBSYCL_ENTRY_POINT_ATTR__(KernelName)
+#endif // SYCL_LANGUAGE_VERSION
+
+ template <typename KernelName, typename KernelType>
+ _LIBSYCL_ENTRY_POINT_ATTR__(KernelName)
+ void submitSingleTask(const KernelType KernelFunc) {
+ KernelFunc();
+ }
+
+ event getLastEvent();
+ void submitKernelImpl(const char *KernelName,
+ detail::ArgCollection &TypelessArgs);
+ void setKernelParameters(const std::vector<event> &Events,
+ const detail::UnifiedRangeView &Range = {});
+
queue(const std::shared_ptr<detail::QueueImpl> &Impl) : impl(Impl) {}
std::shared_ptr<detail::QueueImpl> impl;
diff --git a/libsycl/src/detail/queue_impl.cpp b/libsycl/src/detail/queue_impl.cpp
index 74ccc48877c09..243f38612e74c 100644
--- a/libsycl/src/detail/queue_impl.cpp
+++ b/libsycl/src/detail/queue_impl.cpp
@@ -16,6 +16,32 @@ _LIBSYCL_BEGIN_NAMESPACE_SYCL
namespace detail {
+static void setKernelLaunchArgs(const detail::UnifiedRangeView &Range,
+ ol_kernel_launch_size_args_t &ArgsToSet) {
+ size_t GlobalSize[3] = {1, 1, 1};
+ if (Range.MGlobalSize) {
+ for (uint32_t I = 0; I < Range.MDims; I++) {
+ GlobalSize[I] = Range.MGlobalSize[I];
+ }
+ }
+
+ size_t GroupSize[3] = {1, 1, 1};
+ if (Range.MLocalSize) {
+ for (uint32_t I = 0; I < Range.MDims; I++) {
+ GroupSize[I] = Range.MLocalSize[I];
+ }
+ }
+
+ ArgsToSet.Dimensions = Range.MDims;
+ ArgsToSet.NumGroups.x = GlobalSize[0] / GroupSize[0];
+ ArgsToSet.NumGroups.y = GlobalSize[1] / GroupSize[1];
+ ArgsToSet.NumGroups.z = GlobalSize[2] / GroupSize[2];
+ ArgsToSet.GroupSize.x = GroupSize[0];
+ ArgsToSet.GroupSize.y = GroupSize[1];
+ ArgsToSet.GroupSize.z = GroupSize[2];
+ ArgsToSet.DynSharedMemory = 0;
+}
+
QueueImpl::QueueImpl(DeviceImpl &deviceImpl, const async_handler &asyncHandler,
const property_list &propList, PrivateTag)
: MIsInorder(false), MAsyncHandler(asyncHandler), MPropList(propList),
@@ -34,5 +60,91 @@ backend QueueImpl::getBackend() const noexcept { return MDevice.getBackend(); }
void QueueImpl::wait() { callAndThrow(olSyncQueue, MOffloadQueue); }
+static bool checkEventsPlatformMatch(std::vector<EventImplPtr> &Events,
+ const PlatformImpl &QueuePlatform) {
+ // liboffload limitation to olWaitEvents. We can't do any extra handling for
+ // cross context/platform events without host task support now.
+ // "The input events can be from any queue on any device provided by the
+ // same platform as `Queue`."
+ return std::all_of(Events.cbegin(), Events.cend(),
+ [&QueuePlatform](const EventImplPtr &Event) {
+ return &Event->getPlatformImpl() == &QueuePlatform;
+ });
+}
+
+void QueueImpl::setKernelParameters(std::vector<EventImplPtr> &&Events,
+ const detail::UnifiedRangeView &Range) {
+ if (!checkEventsPlatformMatch(Events, MDevice.getPlatformImpl()))
+ throw sycl::exception(
+ sycl::make_error_code(sycl::errc::feature_not_supported),
+ "libsycl doesn't support cross-context/platform event dependencies "
+ "now.");
+
+ // TODO: this convertion and storing only offload events is possible only
+ // while we don't have host tasks (and features based on host tasks, like
+ // streams). With them - it is very likely we should copy EventImplPtr
+ // (shared_ptr) and keep it here. Although it may differ if host tasks will be
+ // implemented on offload level (no data now).
+ assert(MCurrentSubmitInfo.DepEvents.empty() &&
+ "Kernel submission must clean up dependencies.");
+ MCurrentSubmitInfo.DepEvents.reserve(Events.size());
+ for (auto &Event : Events) {
+ assert(Event && "Event impl object can't be nullptr");
+ MCurrentSubmitInfo.DepEvents.push_back(Event->getHandle());
+ }
+ setKernelLaunchArgs(Range, MCurrentSubmitInfo.Range);
+}
+
+void QueueImpl::submitKernelImpl(const char *KernelName,
+ detail::ArgCollection &TypelessArgs) {
+ ol_symbol_handle_t Kernel =
+ detail::ProgramManager::getInstance().getOrCreateKernel(KernelName,
+ MDevice);
+ assert(Kernel);
+
+ ol_event_handle_t NewEvent{};
+ if (!MCurrentSubmitInfo.DepEvents.empty()) {
+ callAndThrow(olWaitEvents, MOffloadQueue,
+ MCurrentSubmitInfo.DepEvents.data(),
+ MCurrentSubmitInfo.DepEvents.size());
+ }
+
+ const void *Arguments = nullptr;
+ int64_t ArgumentsSize = 0;
+ if (TypelessArgs.getArgCount()) {
+ // without decomposition and free functions extension we always expect 1
+ // argument to the kernel - lambda capture.
+ assert(TypelessArgs.getArgCount() == 1 &&
+ "No arg decomposition or extensions are supported now.");
+ // TODO: liboffload doesn't support more than 1 argument without copy now.
+ // It doesn't expect array of arguments, it requires a contiguous memory
+ // with args. While we have only 1 argument we don't need extra handling
+ // here, we just pass the first argument directly.
+ Arguments = TypelessArgs.getArgPtrArray()[0];
+ ArgumentsSize = TypelessArgs.getSizesArray()[0];
+ }
+
+ // ol_kernel_launch_prop_t Props[2];
+ // Props[0].type = OL_KERNEL_LAUNCH_PROP_TYPE_SIZE;
+ // Props[0].data = &ArgumentsSize;
+ // Props[1] = OL_KERNEL_LAUNCH_PROP_END;
+ auto Result =
+ olLaunchKernel(MOffloadQueue, MDevice.getHandle(), Kernel, Arguments,
+ ArgumentsSize, &MCurrentSubmitInfo.Range /*, Props*/);
+ // Clean up current kernel submit data to prepare structures for next
+ // submission.
+ MCurrentSubmitInfo.DepEvents.clear();
+ MCurrentSubmitInfo.Range = {};
+ if (isFailed(Result))
+ throw sycl::exception(sycl::make_error_code(sycl::errc::runtime),
+ std::string("Kernel submission (") + KernelName +
+ ") failed with " + formatCodeString(Result));
+
+ callAndThrow(olCreateEvent, MOffloadQueue, &NewEvent);
+
+ MCurrentSubmitInfo.LastEvent =
+ EventImpl::createEventWithHandle(NewEvent, MDevice.getPlatformImpl());
+}
+
} // namespace detail
_LIBSYCL_END_NAMESPACE_SYCL
diff --git a/libsycl/src/detail/queue_impl.hpp b/libsycl/src/detail/queue_impl.hpp
index cdb7595e852ec..6edb40471826a 100644
--- a/libsycl/src/detail/queue_impl.hpp
+++ b/libsycl/src/detail/queue_impl.hpp
@@ -15,6 +15,7 @@
#include <OffloadAPI.h>
#include <memory>
+#include <mutex>
_LIBSYCL_BEGIN_NAMESPACE_SYCL
namespace detail {
@@ -62,16 +63,52 @@ class QueueImpl : public std::enable_shared_from_this<QueueImpl> {
/// \return true if and only if the queue is in order.
bool isInOrder() const { return MIsInorder; }
+ /// Enqueues kernel to liboffload.
+ /// Kernel parameters like dependencies and range must be passed in advance by
+ /// calling setKernelParameters.
+ /// \param KernelName a name of kernel to be enqueued.
+ /// \param TypelessArgs data about kernel arguments to be used for enqueue.
+ void submitKernelImpl(const char *KernelName,
+ detail::ArgCollection &TypelessArgs);
+
+ /// \return an event impl object that corresponds to the last kernel
+ /// submission in the calling thread.
+ EventImplPtr getLastEvent() {
+ assert(MCurrentSubmitInfo.LastEvent &&
+ "getLastEvent must be called after enqueue");
+ return MCurrentSubmitInfo.LastEvent;
+ }
+
+ /// Sets kernel parameters to be used in the next submitKernelImpl call.
+ /// Must be called prior to submitKernelImpl call.
+ /// \param Events a collection of events that kernal depends on.
+ /// \param Range a unified range view of execution range.
+ void setKernelParameters(std::vector<EventImplPtr> &&Events,
+ const detail::UnifiedRangeView &Range);
+
/// Waits for completion of all kernels submitted to this queue.
void wait();
private:
+ // Queue features.
ol_queue_handle_t MOffloadQueue = {};
const bool MIsInorder;
const async_handler MAsyncHandler;
const property_list MPropList;
DeviceImpl &MDevice;
ContextImpl &MContext;
+
+ // Submit data.
+ struct KernelSubmitInfo {
+ EventImplPtr LastEvent;
+ ol_kernel_launch_size_args_t Range;
+ // TODO: consider storing EventImplPtr here, it will work with plain handle
+ // only because submission is done within queue::submit call. Otherwise we
+ // need to ensure that event handle is still alive by keeping our own copy
+ // of EventImpl.
+ std::vector<ol_event_handle_t> DepEvents;
+ };
+ inline static thread_local KernelSubmitInfo MCurrentSubmitInfo = {};
};
} // namespace detail
diff --git a/libsycl/src/queue.cpp b/libsycl/src/queue.cpp
index 9fe020eabf2cc..f9d867e9567d7 100644
--- a/libsycl/src/queue.cpp
+++ b/libsycl/src/queue.cpp
@@ -33,6 +33,25 @@ device queue::get_device() const {
bool queue::is_in_order() const { return impl->isInOrder(); }
+event queue::getLastEvent() {
+ return detail::createSyclObjFromImpl<event>(impl->getLastEvent());
+}
+
+void queue::setKernelParameters(const std::vector<event> &Events,
+ const detail::UnifiedRangeView &Range) {
+ std::vector<detail::EventImplPtr> DepEventImplRefs;
+ DepEventImplRefs.reserve(Events.size());
+ for (const auto &Event : Events) {
+ DepEventImplRefs.push_back(detail::getSyclObjImpl(Event));
+ }
+ return impl->setKernelParameters(std::move(DepEventImplRefs), Range);
+}
+
+void queue::submitKernelImpl(const char *KernelName,
+ detail::ArgCollection &TypelessArgs) {
+ impl->submitKernelImpl(KernelName, TypelessArgs);
+}
+
void queue::wait() { return impl->wait(); }
_LIBSYCL_END_NAMESPACE_SYCL
diff --git a/libsycl/test/basic/get_backend.cpp b/libsycl/test/basic/get_backend.cpp
new file mode 100644
index 0000000000000..064149a0c67e8
--- /dev/null
+++ b/libsycl/test/basic/get_backend.cpp
@@ -0,0 +1,54 @@
+// REQUIRES: any-device
+// RUN: %clangxx -fsycl %s -o %t.out
+// RUN: %t.out
+
+#include <iostream>
+
+#include <sycl/sycl.hpp>
+
+using namespace sycl;
+
+class Kernel1;
+
+bool check(backend be) {
+ switch (be) {
+ case backend::opencl:
+ case backend::level_zero:
+ case backend::cuda:
+ case backend::hip:
+ return true;
+ default:
+ return false;
+ }
+ return false;
+}
+
+inline void return_fail() {
+ std::cout << "Failed" << std::endl;
+ exit(1);
+}
+
+int main() {
+ for (const auto &plt : platform::get_platforms()) {
+ if (check(plt.get_backend()) == false) {
+ return_fail();
+ }
+
+ auto device = device::get_devices()[0];
+ if (device.get_backend() != plt.get_backend()) {
+ return_fail();
+ }
+
+ queue q(device);
+ if (q.get_backend() != plt.get_backend()) {
+ return_fail();
+ }
+
+ event e = q.single_task<Kernel1>([]() {});
+ if (e.get_backend() != plt.get_backend()) {
+ return_fail();
+ }
+ }
+ std::cout << "Passed" << std::endl;
+ return 0;
+}
diff --git a/libsycl/test/basic/submit_fn_ptr.cpp b/libsycl/test/basic/submit_fn_ptr.cpp
new file mode 100644
index 0000000000000..2a5ce832d4db2
--- /dev/null
+++ b/libsycl/test/basic/submit_fn_ptr.cpp
@@ -0,0 +1,18 @@
+// REQUIRES: any-device
+// RUN: %clangxx -fsycl %s -o %t.out
+// RUN: %t.out
+
+#include <sycl/sycl.hpp>
+
+class Test;
+
+int main() {
+ sycl::queue q;
+ int *p = sycl::malloc_shared<int>(1, q);
+ *p = 0;
+ q.single_task<Test>([=]() { *p = 42; });
+ q.wait();
+ assert(*p == 42);
+ sycl::free(p, q);
+ return 0;
+}
>From 104ccef02b0d36581c7a60bcf6d7459284e8db64 Mon Sep 17 00:00:00 2001
From: "Tikhomirova, Kseniya" <kseniya.tikhomirova at intel.com>
Date: Thu, 26 Mar 2026 07:05:33 -0700
Subject: [PATCH 2/7] draft
Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova at intel.com>
add tests for parallel_for
Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova at intel.com>
remove operators from index space classes
Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova at intel.com>
---
libsycl/docs/index.rst | 1 +
.../sycl/__impl/detail/kernel_arg_helpers.hpp | 187 ++++++++
.../sycl/__impl/detail/unified_range_view.hpp | 6 +-
.../sycl/__impl/index_space_classes.hpp | 413 ++++++++++++++++++
libsycl/include/sycl/__impl/queue.hpp | 223 ++++++++--
libsycl/include/sycl/__spirv/spirv_vars.hpp | 75 ++++
.../test/basic/queue_parallel_for_generic.cpp | 47 ++
libsycl/test/basic/wrapped_usm_pointers.cpp | 111 +++++
8 files changed, 1031 insertions(+), 32 deletions(-)
create mode 100644 libsycl/include/sycl/__impl/detail/kernel_arg_helpers.hpp
create mode 100644 libsycl/include/sycl/__impl/index_space_classes.hpp
create mode 100644 libsycl/include/sycl/__spirv/spirv_vars.hpp
create mode 100644 libsycl/test/basic/queue_parallel_for_generic.cpp
create mode 100644 libsycl/test/basic/wrapped_usm_pointers.cpp
diff --git a/libsycl/docs/index.rst b/libsycl/docs/index.rst
index 5961eeeedcedb..585d05a78987d 100644
--- a/libsycl/docs/index.rst
+++ b/libsycl/docs/index.rst
@@ -126,6 +126,7 @@ TODO for added SYCL classes
* handle sub devices once they are implemented (blocked by liboffload support)
* ``event``: get_wait_list, get_info, get_profiling_info, wait_and_throw & default ctor are not implemented
+* ``range``, ``id`` - to add operators
* general opens:
* define a way to report errors from object dtors.
\ No newline at end of file
diff --git a/libsycl/include/sycl/__impl/detail/kernel_arg_helpers.hpp b/libsycl/include/sycl/__impl/detail/kernel_arg_helpers.hpp
new file mode 100644
index 0000000000000..d4a0ea9f63ff2
--- /dev/null
+++ b/libsycl/include/sycl/__impl/detail/kernel_arg_helpers.hpp
@@ -0,0 +1,187 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// to add
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBSYCL___IMPL_DETAIL_KERNEL_ARG_HELPERS
+#define _LIBSYCL___IMPL_DETAIL_KERNEL_ARG_HELPERS
+
+#include <sycl/__impl/index_space_classes.hpp>
+
+#include <sycl/__impl/detail/config.hpp>
+
+#ifdef __SYCL_DEVICE_ONLY__
+# include <sycl/__spirv/spirv_vars.hpp>
+#endif
+
+_LIBSYCL_BEGIN_NAMESPACE_SYCL
+
+namespace detail {
+
+/// \name Helpers for the unnamed lambda extension.
+/// @{
+/// This class is the default kernel name template parameter type for kernel
+/// invocation APIs such as single_task.
+class AutoName {};
+
+/// Helper struct to get a kernel name type based on given Name and Type
+/// types: if Name is undefined (is a AutoName) then Type becomes
+/// the Name.
+template <typename Name, typename Type> struct get_kernel_name_t {
+ using name = Name;
+};
+
+/// Specialization for the case when Name is undefined.
+/// This is only legal with our compiler with the unnamed lambda extension or if
+/// the kernel is a functor object.
+template <typename Type> struct get_kernel_name_t<detail::AutoName, Type> {
+ using name = Type;
+};
+/// @}
+
+/// \name Helpers to verify kernel lambda type.
+/// \brief Checks that the function is callable with operator().
+/// @{
+template <typename, typename T> struct CheckFunctionSignature {
+ static_assert(std::integral_constant<T, false>::value,
+ "Second template parameter is required to be of function type");
+};
+
+template <typename F, typename RetT, typename... Args>
+struct CheckFunctionSignature<F, RetT(Args...)> {
+private:
+ template <typename T>
+ static constexpr auto check(T *) -> typename std::is_same<
+ decltype(std::declval<T>().operator()(std::declval<Args>()...)),
+ RetT>::type;
+
+ template <typename> static constexpr std::false_type check(...);
+
+ using type = decltype(check<F>(0));
+
+public:
+ static constexpr bool value = type::value;
+};
+/// @}
+
+/// \name Helpers to extract types of lambda arguments.
+/// @{
+template <typename RetType, typename Func, typename Arg>
+static Arg member_ptr_helper(RetType (Func::*)(Arg) const);
+
+// Non-const version of the above template to match functors whose
+// 'operator()' is declared w/o the 'const' qualifier.
+template <typename RetType, typename Func, typename Arg>
+static Arg member_ptr_helper(RetType (Func::*)(Arg));
+
+template <typename F, typename SuggestedArgType>
+decltype(member_ptr_helper(&F::operator())) argument_helper(int);
+
+template <typename F, typename SuggestedArgType>
+SuggestedArgType argument_helper(...);
+
+template <typename F, typename SuggestedArgType>
+using lambda_arg_type = decltype(argument_helper<F, SuggestedArgType>(0));
+
+#if __has_builtin(__type_pack_element)
+template <int N, typename... Ts>
+using nth_type_t = __type_pack_element<N, Ts...>;
+#else
+template <int N, typename T, typename... Ts> struct nth_type {
+ using type = typename nth_type<N - 1, Ts...>::type;
+};
+
+template <typename T, typename... Ts> struct nth_type<0, T, Ts...> {
+ using type = T;
+};
+
+template <int N, typename... Ts>
+using nth_type_t = typename nth_type<N, Ts...>::type;
+#endif
+/// @}
+
+template <typename T> T *declptr() { return static_cast<T *>(nullptr); }
+
+template <int N>
+static inline constexpr bool isValidDimensions = (N > 0) && (N < 4);
+
+/// Class provides helper functions for iteration space coordinates in kernel
+/// invocation on device.
+class Builder {
+public:
+ Builder() = delete;
+
+#ifdef __SYCL_DEVICE_ONLY__
+ /// \return a global index of work item currently being operated on by device.
+ template <int Dims> static const id<Dims> getElement(id<Dims> *) {
+ static_assert(isValidDimensions<Dims>, "invalid dimensions");
+ return __spirv::initBuiltInGlobalInvocationId<Dims, id<Dims>>();
+ }
+
+ /// Constructs item with the given data.
+ /// \param Extent a range representing the dimensions of the range of possible
+ /// values of the item.
+ /// \param Index a constituent id representing the work-item’s position in the
+ /// iteration space.
+ /// \param Offset an id representing the n-dimensional offset that should be
+ /// added to the global-ID of each work-item, if this item represents a global
+ /// range. Deprecated in SYCL 2020.
+ template <int Dims, bool WithOffset>
+ static std::enable_if_t<WithOffset, item<Dims, WithOffset>>
+ createItem(const range<Dims> &Extent, const id<Dims> &Index,
+ const id<Dims> &Offset) {
+ return item<Dims, WithOffset>(Extent, Index, Offset);
+ }
+
+ /// Constructs item with the given data.
+ /// \param Extent a range representing the dimensions of the range of possible
+ /// values of the item.
+ /// \param Index a constituent id representing the work-item’s position in the
+ /// iteration space.
+ template <int Dims, bool WithOffset>
+ static std::enable_if_t<!WithOffset, item<Dims, WithOffset>>
+ createItem(const range<Dims> &Extent, const id<Dims> &Index) {
+ return item<Dims, WithOffset>(Extent, Index);
+ }
+
+ /// Creates sycl::item instance for work item that is currently being operated
+ /// on.
+ template <int Dims, bool WithOffset>
+ static std::enable_if_t<WithOffset, const item<Dims, WithOffset>> getItem() {
+ static_assert(isValidDimensions<Dims>, "invalid dimensions");
+ id<Dims> GlobalId{__spirv::initBuiltInGlobalInvocationId<Dims, id<Dims>>()};
+ range<Dims> GlobalSize{__spirv::initBuiltInGlobalSize<Dims, range<Dims>>()};
+ id<Dims> GlobalOffset{__spirv::initBuiltInGlobalOffset<Dims, id<Dims>>()};
+ return createItem<Dims, true>(GlobalSize, GlobalId, GlobalOffset);
+ }
+
+ /// Creates sycl::item instance for work item that is currently being operated
+ /// on.
+ template <int Dims, bool WithOffset>
+ static std::enable_if_t<!WithOffset, const item<Dims, WithOffset>> getItem() {
+ static_assert(isValidDimensions<Dims>, "invalid dimensions");
+ id<Dims> GlobalId{__spirv::initBuiltInGlobalInvocationId<Dims, id<Dims>>()};
+ range<Dims> GlobalSize{__spirv::initBuiltInGlobalSize<Dims, range<Dims>>()};
+ return createItem<Dims, false>(GlobalSize, GlobalId);
+ }
+
+ /// \return a work item currently being operated on by device.
+ template <int Dims, bool WithOffset>
+ static auto getElement(item<Dims, WithOffset> *)
+ -> decltype(getItem<Dims, WithOffset>()) {
+ return getItem<Dims, WithOffset>();
+ }
+
+#endif // __SYCL_DEVICE_ONLY__
+};
+
+} // namespace detail
+
+_LIBSYCL_END_NAMESPACE_SYCL
+
+#endif // _LIBSYCL___IMPL_DETAIL_KERNEL_ARG_HELPERS
diff --git a/libsycl/include/sycl/__impl/detail/unified_range_view.hpp b/libsycl/include/sycl/__impl/detail/unified_range_view.hpp
index afa613fc8627b..8f321349d4c2e 100644
--- a/libsycl/include/sycl/__impl/detail/unified_range_view.hpp
+++ b/libsycl/include/sycl/__impl/detail/unified_range_view.hpp
@@ -17,6 +17,8 @@
#include <sycl/__impl/detail/config.hpp>
+#include <sycl/__impl/index_space_classes.hpp>
+
_LIBSYCL_BEGIN_NAMESPACE_SYCL
namespace detail {
@@ -33,7 +35,9 @@ class UnifiedRangeView {
UnifiedRangeView &operator=(const UnifiedRangeView &Desc) = default;
UnifiedRangeView &operator=(UnifiedRangeView &&Desc) = default;
- // TODO: ctors with sycl::range and nd::range will be added later.
+ template <int Dims>
+ UnifiedRangeView(sycl::range<Dims> &N)
+ : MGlobalSize(&(N[0])), MDims(size_t(Dims)) {}
UnifiedRangeView(const size_t *GlobalSize, const size_t *LocalSize,
const size_t *Offset, size_t Dims)
diff --git a/libsycl/include/sycl/__impl/index_space_classes.hpp b/libsycl/include/sycl/__impl/index_space_classes.hpp
new file mode 100644
index 0000000000000..ef2897cee5307
--- /dev/null
+++ b/libsycl/include/sycl/__impl/index_space_classes.hpp
@@ -0,0 +1,413 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the declaration of the SYCL 2020 ranges and index space
+/// identifiers (4.9.1.).
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBSYCL___IMPL_INDEX_SPACE_CLASSES_HPP
+#define _LIBSYCL___IMPL_INDEX_SPACE_CLASSES_HPP
+
+#include <sycl/__impl/detail/config.hpp>
+
+_LIBSYCL_BEGIN_NAMESPACE_SYCL
+
+namespace detail {
+
+class Builder;
+
+/// Helper class for dimensions data management.
+template <int Dimensions = 1> class RawArray {
+ static_assert(Dimensions >= 1 && Dimensions <= 3,
+ "RawArray can only be 1, 2, or 3 Dimensional.");
+
+public:
+ /// Constructs one-dimensional instance and assign corresponding data to Dim0
+ /// value. Available only if Dimensions = 1.
+ template <int N = Dimensions, std::enable_if_t<N == 1, bool> = true>
+ RawArray(size_t Dim0 = 0) : MArray{Dim0} {}
+
+ /// Constructs two-dimensional instance and assign corresponding data.
+ /// Available only if Dimensions = 2.
+ template <int N = Dimensions, std::enable_if_t<N == 2, bool> = true>
+ RawArray(size_t Dim0, size_t Dim1) : MArray{Dim0, Dim1} {}
+
+ /// Constructs two-dimensional instance with zero-initialized corresponding
+ /// data. Available only if Dimensions = 2.
+ template <int N = Dimensions, std::enable_if_t<N == 2, bool> = true>
+ RawArray() : RawArray(0, 0) {}
+
+ /// Constructs three-dimensional instance and assign corresponding data.
+ /// Available only if Dimensions = 3.
+ template <int N = Dimensions, std::enable_if_t<N == 3, bool> = true>
+ RawArray(size_t Dim0, size_t Dim1, size_t Dim2) : MArray{Dim0, Dim1, Dim2} {}
+
+ /// Constructs three-dimensional instance with zero-initialized corresponding
+ /// data. Available only if Dimensions = 3.
+ template <int N = Dimensions, std::enable_if_t<N == 3, bool> = true>
+ RawArray() : RawArray(0, 0, 0) {}
+
+ /// Returns value for the specified dimension.
+ /// Results in undefined behavior if dimension is not in the range [0,
+ /// Dimensions).
+ /// \param Dimension a dimension to query data for.
+ /// \return value in array matching requested dimension.
+ std::size_t get(int Dimension) const noexcept { return MArray[Dimension]; }
+
+ /// Returns value for the specified dimension.
+ /// Results in undefined behavior if dimension is not in the range [0,
+ /// Dimensions).
+ /// \param Dimension a dimension to query data for.
+ /// \return value in array matching requested dimension.
+ std::size_t &operator[](int Dimension) noexcept { return MArray[Dimension]; }
+
+ /// Returns value for the specified dimension.
+ /// Results in undefined behavior if dimension is not in the range [0,
+ /// Dimensions).
+ /// \param Dimension a dimension to query data for.
+ /// \return value in array matching requested dimension.
+ std::size_t operator[](int Dimension) const noexcept {
+ return MArray[Dimension];
+ }
+
+ RawArray(const RawArray<Dimensions> &rhs) = default;
+ RawArray(RawArray<Dimensions> &&rhs) = default;
+ RawArray<Dimensions> &operator=(const RawArray<Dimensions> &rhs) = default;
+ RawArray<Dimensions> &operator=(RawArray<Dimensions> &&rhs) = default;
+ ~RawArray() = default;
+
+ friend bool operator==(const RawArray<Dimensions> &lhs,
+ const RawArray<Dimensions> &rhs) {
+ for (int i = 0; i < Dimensions; ++i) {
+ if (lhs.MArray[i] != rhs.MArray[i]) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ friend bool operator!=(const RawArray<Dimensions> &lhs,
+ const RawArray<Dimensions> &rhs) {
+ for (int i = 0; i < Dimensions; ++i) {
+ if (lhs.MArray[i] != rhs.MArray[i]) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+protected:
+ size_t MArray[Dimensions];
+};
+} // namespace detail
+
+/// SYCL 2020 4.9.1.1. range class.
+/// range<int Dimensions> is a 1D, 2D or 3D vector that defines the iteration
+/// domain of either a single work-group in a parallel dispatch, or the overall
+/// Dimensions of the dispatch.
+template <int Dimensions = 1>
+class range : public detail::RawArray<Dimensions> {
+ static_assert(Dimensions >= 1 && Dimensions <= 3,
+ "range can only be 1, 2, or 3 Dimensional.");
+ using Base = detail::RawArray<Dimensions>;
+
+public:
+ static constexpr int dimensions = Dimensions;
+ range() noexcept = default;
+ range(const range<Dimensions> &rhs) = default;
+ range(range<Dimensions> &&rhs) = default;
+ range<Dimensions> &operator=(const range<Dimensions> &rhs) = default;
+ range<Dimensions> &operator=(range<Dimensions> &&rhs) = default;
+
+ /// Construct a 1D range with value dim0.
+ /// Only valid when the template parameter Dimensions is equal to 1.
+ template <int N = Dimensions, std::enable_if_t<N == 1, bool> = true>
+ range(std::size_t dim0) noexcept : Base(dim0) {}
+
+ /// Construct a 2D range with values dim0 and dim1.
+ /// Only valid when the template parameter Dimensions is equal to 2.
+ template <int N = Dimensions, std::enable_if_t<N == 2, bool> = true>
+ range(std::size_t dim0, std::size_t dim1) noexcept : Base(dim0, dim1) {}
+
+ /// Construct a 3D range with values dim0, dim1 and dim2.
+ /// Only valid when the template parameter Dimensions is equal to 3.
+ template <int N = Dimensions, std::enable_if_t<N == 3, bool> = true>
+ range(std::size_t dim0, std::size_t dim1, std::size_t dim2) noexcept
+ : Base(dim0, dim1, dim2) {}
+
+ /*
+ Declared and implemented in detail::RawArray:
+ std::size_t get(int dimension) const noexcept;
+ std::size_t& operator[](int dimension) noexcept;
+ std::size_t operator[](int dimension) const noexcept;
+ */
+
+ /// \return the size of the range computed as dimension0*…*dimensionN.
+ std::size_t size() const noexcept {
+ std::size_t size = 1;
+ for (int i = 0; i < Dimensions; ++i) {
+ size *= Base::MArray[i];
+ }
+ return size;
+ }
+
+ // TODO: operators to be added
+};
+
+/// c++ deduction guides.
+#ifdef __cpp_deduction_guides
+range(std::size_t) -> range<1>;
+range(std::size_t, std::size_t) -> range<2>;
+range(std::size_t, std::size_t, std::size_t) -> range<3>;
+#endif
+
+template <int Dimensions = 1, bool WithOffset = true> class item;
+
+/// SYCL 2020 4.9.1.3. id class.
+/// id<int Dimensions> is a vector of Dimensions that is used to represent an id
+/// into a global or local range. It can be used as an index in an accessor of
+/// the same rank.
+template <int Dimensions = 1> class id : public detail::RawArray<Dimensions> {
+ static_assert(Dimensions >= 1 && Dimensions <= 3,
+ "id can only be 1, 2, or 3 Dimensional.");
+ using Base = detail::RawArray<Dimensions>;
+
+ // Helper class for conversion operator. Void type is not suitable. User
+ // cannot even try to get address of the operator PrivateTag(). User
+ // may try to get an address of operator void() and will get the
+ // compile-time error
+ class PrivateTag;
+ template <bool Condition, typename T>
+ using EnableIfT = std::conditional_t<Condition, T, PrivateTag>;
+
+public:
+ static constexpr int dimensions = Dimensions;
+
+ id() noexcept = default;
+ id(const id<Dimensions> &rhs) = default;
+ id(id<Dimensions> &&rhs) = default;
+ id<Dimensions> &operator=(const id<Dimensions> &rhs) = default;
+ id<Dimensions> &operator=(id<Dimensions> &&rhs) = default;
+
+ /// Construct a 1D id with value dim0.
+ /// Only valid when the template parameter Dimensions is equal to 1.
+ template <int N = Dimensions, std::enable_if_t<N == 1, bool> = true>
+ id(std::size_t dim0) noexcept : Base(dim0) {}
+
+ /// Construct a 2D id with values dim0, dim1.
+ /// Only valid when the template parameter Dimensions is equal to 2.
+ template <int N = Dimensions, std::enable_if_t<N == 2, bool> = true>
+ id(std::size_t dim0, std::size_t dim1) noexcept : Base(dim0, dim1) {}
+
+ /// Construct a 3D id with values dim0, dim1, dim2.
+ /// Only valid when the template parameter Dimensions is equal to 3.
+ template <int N = Dimensions, std::enable_if_t<N == 3, bool> = true>
+ id(std::size_t dim0, std::size_t dim1, std::size_t dim2) noexcept
+ : Base(dim0, dim1, dim2) {}
+
+ /// Construct an id from the dimensions of range.
+ /// Only valid when the template parameter Dimensions is equal to 1.
+ template <int N = Dimensions, std::enable_if_t<N == 1, bool> = true>
+ id(const range<Dimensions> &range) noexcept : Base(range.get(0)) {}
+
+ /// Construct an id from the dimensions of range.
+ /// Only valid when the template parameter Dimensions is equal to 2.
+ template <int N = Dimensions, std::enable_if_t<N == 2, bool> = true>
+ id(const range<Dimensions> &range) noexcept
+ : Base(range.get(0), range.get(1)) {}
+
+ /// Construct an id from the dimensions of range.
+ /// Only valid when the template parameter Dimensions is equal to 3.
+ template <int N = Dimensions, std::enable_if_t<N == 3, bool> = true>
+ id(const range<Dimensions> &range) noexcept
+ : Base(range.get(0), range.get(1), range.get(2)) {}
+
+ /// Construct an id from item.get_id().
+ /// Only valid when the template parameter Dimensions is equal to 1.
+ template <int N = Dimensions, std::enable_if_t<N == 1, bool> = true>
+ id(const item<Dimensions> &item) noexcept : Base(item.get_id(0)) {}
+
+ /// Construct an id from item.get_id().
+ /// Only valid when the template parameter Dimensions is equal to 2.
+ template <int N = Dimensions, std::enable_if_t<N == 2, bool> = true>
+ id(const item<Dimensions> &item) noexcept
+ : Base(item.get_id(0), item.get_id(1)) {}
+
+ /// Construct an id from item.get_id().
+ /// Only valid when the template parameter Dimensions is equal to 3.
+ template <int N = Dimensions, std::enable_if_t<N == 3, bool> = true>
+ id(const item<Dimensions> &item) noexcept
+ : Base(item.get_id(0), item.get_id(1), item.get_id(2)) {}
+
+ /*
+ Declared and implemented in detail::RawArray:
+ std::size_t get(int dimension) const noexcept;
+ std::size_t& operator[](int dimension) noexcept;
+ std::size_t operator[](int dimension) const noexcept;
+ */
+
+ // Template operator is not allowed because it disables further type
+ // conversion. For example, the next code will not work in case of template
+ // conversion:
+ // int a = id<1>(value);
+ /// Returns the same value as get(0).
+ /// Available only when: Dimensions == 1.
+ operator EnableIfT<(Dimensions == 1), std::size_t>() const noexcept {
+ return Base::get(0);
+ }
+
+ // TODO: operators to be added
+};
+
+/// c++ deduction guides.
+#ifdef __cpp_deduction_guides
+id(std::size_t) -> id<1>;
+id(std::size_t, std::size_t) -> id<2>;
+id(std::size_t, std::size_t, std::size_t) -> id<3>;
+#endif
+
+/// SYCL 2020 4.9.1.4. item class.
+/// item identifies an instance of the function object executing at each point
+/// in a range.
+template <int Dimensions /* = 1*/, bool WithOffset /* = true*/> class item {
+ /* Helper class for conversion operator. Void type is not suitable. User
+ * cannot even try to get address of the operator PrivateTag(). User
+ * may try to get an address of operator void() and will get the
+ * compile-time error */
+ class PrivateTag;
+ template <bool Condition, typename T>
+ using EnableIfT = std::conditional_t<Condition, T, PrivateTag>;
+
+public:
+ static constexpr int dimensions = Dimensions;
+
+ item() = delete;
+
+ item(const item &rhs) = default;
+
+ item(item<Dimensions, WithOffset> &&rhs) = default;
+
+ item &operator=(const item &rhs) = default;
+
+ item &operator=(item &&rhs) = default;
+
+ friend bool operator==(const item<Dimensions, WithOffset> &lhs,
+ const item<Dimensions, WithOffset> &rhs) {
+ if constexpr (WithOffset)
+ return (lhs.MId == rhs.MId) && (lhs.MRange == rhs.MRange) &&
+ (lhs.MOffset == rhs.MOffset);
+ else
+ return (lhs.MId == rhs.MId) && (lhs.MRange == rhs.MRange);
+ }
+
+ friend bool operator!=(const item<Dimensions, WithOffset> &lhs,
+ const item<Dimensions, WithOffset> &rhs) {
+ return !(lhs == rhs);
+ }
+
+ /// \return the constituent id representing the work-item’s position in the
+ /// iteration space.
+ id<Dimensions> get_id() const noexcept { return MId; }
+
+ /// Equivalent to return get_id()[dimension].
+ std::size_t get_id(int dimension) const noexcept {
+ return MId.get(dimension);
+ }
+
+ /// Equivalent to return get_id(dimension).
+ std::size_t operator[](int dimension) const noexcept {
+ return MId[dimension];
+ }
+
+ /// \return a range representing the dimensions of the range of possible
+ /// values of the item.
+ range<Dimensions> get_range() const noexcept { return MRange; }
+
+ /// Equivalent to return get_range().get(dimension).
+ std::size_t get_range(int dimension) const noexcept {
+ return MRange[dimension];
+ }
+
+ /// Deprecated in SYCL 2020.
+ /// For an item converted from an item with no offset this will always return
+ /// an id of all 0 values. This member function is only available if
+ /// WithOffset is true.
+ /// \return an id representing the n-dimensional offset provided to the
+ /// parallel_for and that is added by the runtime to the global-ID of each
+ /// work-item, if this item represents a global range.
+ template <bool HasOffset = WithOffset,
+ std::enable_if_t<HasOffset == true, bool> = true>
+ id<Dimensions> get_offset() const noexcept {
+ return MOffset;
+ }
+
+ /// Deprecated in SYCL 2020.
+ /// This conversion allow users to seamlessly write code that assumes an
+ /// offset and still provides an offset-less item. Available only when:
+ /// WithOffset == false.
+ /// \return an item representing the same information as the object holds but
+ /// also includes the offset set to 0.
+ template <bool HasOffset = WithOffset,
+ std::enable_if_t<HasOffset == false, bool> = true>
+ operator item<Dimensions, true>() const noexcept {
+ return item<Dimensions, true>(MRange, MId, id<Dimensions>{});
+ }
+
+ /// Equivalent to get_id(0).
+ /// Available only when: Dimensions == 1.
+ operator EnableIfT<(Dimensions == 1), std::size_t>() const noexcept {
+ return get_id(0);
+ }
+
+ /// \return Return the id as a linear index value.
+ std::size_t get_linear_id() const noexcept {
+ if constexpr (WithOffset) {
+ if constexpr (1 == Dimensions) {
+ return MId;
+ }
+ if constexpr (2 == Dimensions) {
+ return (MId[0] - MOffset[0]) * MRange[1] + (MId[1] - MOffset[1]);
+ }
+ return ((MId[0] - MOffset[0]) * MRange[1] * MRange[2]) +
+ ((MId[1] - MOffset[1]) * MRange[2]) + (MId[2] - MOffset[2]);
+ } else {
+ if constexpr (1 == Dimensions) {
+ return MId[0];
+ }
+ if constexpr (2 == Dimensions) {
+ return MId[0] * MRange[1] + MId[1];
+ }
+ return (MId[0] * MRange[1] * MRange[2]) + (MId[1] * MRange[2]) + MId[2];
+ }
+ }
+
+protected:
+ template <bool HasOffset = WithOffset,
+ std::enable_if_t<HasOffset == true, bool> = true>
+ item(const sycl::range<Dimensions> &range, const sycl::id<Dimensions> &id,
+ const sycl::id<Dimensions> &offset)
+ : MRange(range), MId(id), MOffset(offset) {}
+
+ template <bool HasOffset = WithOffset,
+ std::enable_if_t<HasOffset == false, bool> = true>
+ item(const range<Dimensions> &range, const id<Dimensions> &id)
+ : MRange(range), MId(id), MOffset() {}
+
+private:
+ range<Dimensions> MRange;
+ id<Dimensions> MId;
+ id<Dimensions> MOffset;
+
+ friend class detail::Builder;
+};
+
+_LIBSYCL_END_NAMESPACE_SYCL
+
+#endif // _LIBSYCL___IMPL_INDEX_SPACE_CLASSES_HPP
diff --git a/libsycl/include/sycl/__impl/queue.hpp b/libsycl/include/sycl/__impl/queue.hpp
index d1ac320433c38..95653ab0c34ff 100644
--- a/libsycl/include/sycl/__impl/queue.hpp
+++ b/libsycl/include/sycl/__impl/queue.hpp
@@ -23,6 +23,7 @@
#include <sycl/__impl/detail/arg_wrapper.hpp>
#include <sycl/__impl/detail/config.hpp>
#include <sycl/__impl/detail/default_async_handler.hpp>
+#include <sycl/__impl/detail/kernel_arg_helpers.hpp>
#include <sycl/__impl/detail/obj_utils.hpp>
#include <sycl/__impl/detail/unified_range_view.hpp>
@@ -32,28 +33,6 @@ class context;
namespace detail {
class QueueImpl;
-
-template <typename, typename T> struct CheckFunctionSignature {
- static_assert(std::integral_constant<T, false>::value,
- "Second template parameter is required to be of function type");
-};
-
-template <typename F, typename RetT, typename... Args>
-struct CheckFunctionSignature<F, RetT(Args...)> {
-private:
- template <typename T>
- static constexpr auto check(T *) -> typename std::is_same<
- decltype(std::declval<T>().operator()(std::declval<Args>()...)),
- RetT>::type;
-
- template <typename> static constexpr std::false_type check(...);
-
- using type = decltype(check<F>(0));
-
-public:
- static constexpr bool value = type::value;
-};
-
} // namespace detail
// SYCL 2020 4.6.5. Queue class.
@@ -166,7 +145,7 @@ class _LIBSYCL_EXPORT queue {
///
/// \param kernelFunc is the kernel functor or lambda.
/// \return an event that represents the status of the submitted kernel.
- template <typename KernelName, typename KernelType>
+ template <typename KernelName = detail::AutoName, typename KernelType>
event single_task(const KernelType &kernelFunc) {
return single_task<KernelName, KernelType>({}, kernelFunc);
}
@@ -177,7 +156,7 @@ class _LIBSYCL_EXPORT queue {
/// \param depEvent is an event that specifies the kernel dependency.
/// \param kernelFunc is the kernel functor or lambda.
/// \return an event that represents the status of the submitted kernel.
- template <typename KernelName, typename KernelType>
+ template <typename KernelName = detail::AutoName, typename KernelType>
event single_task(event depEvent, const KernelType &kernelFunc) {
return single_task<KernelName, KernelType>({depEvent}, kernelFunc);
}
@@ -189,7 +168,7 @@ class _LIBSYCL_EXPORT queue {
/// dependencies.
/// \param kernelFunc is the kernel functor or lambda.
/// \return an event that represents the status of the submitted kernel.
- template <typename KernelName, typename KernelType>
+ template <typename KernelName = detail::AutoName, typename KernelType>
event single_task(const std::vector<event> &depEvents,
const KernelType &kernelFunc) {
static_assert(
@@ -199,18 +178,169 @@ class _LIBSYCL_EXPORT queue {
"group. ");
setKernelParameters(depEvents);
- submitSingleTask<KernelName, KernelType>(kernelFunc);
+ using NameT =
+ typename detail::get_kernel_name_t<KernelName, KernelType>::name;
+ submitSingleTask<NameT, KernelType>(kernelFunc);
return getLastEvent();
}
+ /// Defines and invokes a SYCL kernel function as a lambda expression or a
+ /// named function object type, for the specified range.
+ ///
+ /// \param numWorkItems specifies the global work space of the kernel.
+ /// \param rest acts as-if: const KernelType &KernelFunc".
+ // TODO: Rest will represent reduction types once it is supported.
+ template <typename KernelName = detail::AutoName, typename... Rest>
+ event parallel_for(range<1> numWorkItems, Rest &&...rest) {
+ return parallel_for<KernelName>(numWorkItems, {},
+ std::forward<Rest>(rest)...);
+ }
+
+ /// Defines and invokes a SYCL kernel function as a lambda expression or a
+ /// named function object type, for the specified range.
+ ///
+ /// \param numWorkItems specifies the global work space of the kernel.
+ /// \param rest acts as-if: const KernelType &KernelFunc".
+ // TODO: Rest will represent reduction types once it is supported.
+ template <typename KernelName = detail::AutoName, typename... Rest>
+ event parallel_for(range<2> numWorkItems, Rest &&...rest) {
+ return parallel_for<KernelName>(numWorkItems, {},
+ std::forward<Rest>(rest)...);
+ }
+
+ /// Defines and invokes a SYCL kernel function as a lambda expression or a
+ /// named function object type, for the specified range.
+ ///
+ /// \param numWorkItems specifies the global work space of the kernel.
+ /// \param rest acts as-if: const KernelType &KernelFunc".
+ // TODO: Rest will represent reduction types once it is supported.
+ template <typename KernelName = detail::AutoName, typename... Rest>
+ event parallel_for(range<3> numWorkItems, Rest &&...rest) {
+ return parallel_for<KernelName>(numWorkItems, {},
+ std::forward<Rest>(rest)...);
+ }
+
+ /// Defines and invokes a SYCL kernel function as a lambda expression or a
+ /// named function object type, for the specified range.
+ ///
+ /// \param numWorkItems specifies the global work space of the kernel.
+ /// \param depEvent adds a requirement that the action represented by depEvent
+ /// must complete before executing this kernel.
+ /// \param rest acts as-if: const KernelType &KernelFunc".
+ // TODO: Rest will represent reduction types once it is supported.
+ template <typename KernelName = detail::AutoName, typename... Rest>
+ event parallel_for(range<1> numWorkItems, event depEvent, Rest &&...rest) {
+ return parallel_for<KernelName>(numWorkItems, {depEvent},
+ std::forward<Rest>(rest)...);
+ }
+
+ /// Defines and invokes a SYCL kernel function as a lambda expression or a
+ /// named function object type, for the specified range.
+ ///
+ /// \param numWorkItems specifies the global work space of the kernel.
+ /// \param depEvent adds a requirement that the action represented by depEvent
+ /// must complete before executing this kernel.
+ /// \param rest acts as-if: const KernelType &KernelFunc".
+ // TODO: Rest will represent reduction types once it is supported.
+ template <typename KernelName = detail::AutoName, typename... Rest>
+ event parallel_for(range<2> numWorkItems, event depEvent, Rest &&...rest) {
+ return parallel_for<KernelName>(numWorkItems, {depEvent},
+ std::forward<Rest>(rest)...);
+ }
+
+ /// Defines and invokes a SYCL kernel function as a lambda expression or a
+ /// named function object type, for the specified range.
+ ///
+ /// \param numWorkItems specifies the global work space of the kernel.
+ /// \param depEvent adds a requirement that the action represented by depEvent
+ /// must complete before executing this kernel.
+ /// \param rest acts as-if: const KernelType &KernelFunc".
+ // TODO: Rest will represent reduction types once it is supported.
+ template <typename KernelName = detail::AutoName, typename... Rest>
+ event parallel_for(range<3> numWorkItems, event depEvent, Rest &&...rest) {
+ return parallel_for<KernelName>(numWorkItems, {depEvent},
+ std::forward<Rest>(rest)...);
+ }
+
+ /// Defines and invokes a SYCL kernel function as a lambda expression or a
+ /// named function object type, for the specified range.
+ ///
+ /// \param numWorkItems specifies the global work space of the kernel
+ /// \param depEvents is a vector of events that specifies the kernel
+ /// dependencies.
+ /// \param rest acts as-if: const KernelType &KernelFunc".
+ // TODO: Rest will represent reduction types once it is supported.
+ template <typename KernelName = detail::AutoName, typename... Rest>
+ event parallel_for(range<1> numWorkItems, const std::vector<event> &depEvents,
+ Rest &&...rest) {
+ return parallelForImpl<KernelName>(numWorkItems, depEvents,
+ std::forward<Rest>(rest)...);
+ }
+
+ /// Defines and invokes a SYCL kernel function as a lambda expression or a
+ /// named function object type, for the specified range.
+ ///
+ /// \param numWorkItems specifies the global work space of the kernel
+ /// \param depEvents is a vector of events that specifies the kernel
+ /// dependencies.
+ /// \param rest acts as-if: const KernelType &KernelFunc".
+ // TODO: Rest will represent reduction types once it is supported.
+ template <typename KernelName = detail::AutoName, typename... Rest>
+ event parallel_for(range<2> numWorkItems, const std::vector<event> &depEvents,
+ Rest &&...rest) {
+ return parallelForImpl<KernelName>(numWorkItems, depEvents,
+ std::forward<Rest>(rest)...);
+ }
+
+ /// Defines and invokes a SYCL kernel function as a lambda expression or a
+ /// named function object type, for the specified range.
+ ///
+ /// \param numWorkItems specifies the global work space of the kernel
+ /// \param depEvents is a vector of events that specifies the kernel
+ /// dependencies.
+ /// \param rest acts as-if: const KernelType &KernelFunc".
+ // TODO: Rest will represent reduction types once it is supported.
+ template <typename KernelName = detail::AutoName, typename... Rest>
+ event parallel_for(range<3> numWorkItems, const std::vector<event> &depEvents,
+ Rest &&...rest) {
+ return parallelForImpl<KernelName>(numWorkItems, depEvents,
+ std::forward<Rest>(rest)...);
+ }
+
/// Blocks the calling thread until all commands previously submitted to this
/// queue have completed. Synchronous errors are reported through SYCL
/// exceptions.
void wait();
private:
- // Name of this function is defined by compiler. It generates call to this
- // function in the host implementation of KernelFunc in submitSingleTask.
+ template <typename KernelName, int Dims, typename... Rest>
+ event parallelForImpl(range<Dims> numWorkItems,
+ const std::vector<event> &depEvents, Rest &&...rest) {
+ if constexpr (sizeof...(Rest) != 1)
+ throw sycl::exception(errc::feature_not_supported,
+ "Reductions are not supported.");
+ setKernelParameters(depEvents, numWorkItems);
+
+ using KernelType =
+ std::decay_t<detail::nth_type_t<sizeof...(Rest) - 1, Rest...>>;
+ using LambdaArgType = sycl::detail::lambda_arg_type<KernelType, item<Dims>>;
+ static_assert(
+ std::is_convertible_v<sycl::item<Dims>, LambdaArgType>,
+ "Kernel argument of a sycl::parallel_for with sycl::range "
+ "must be either sycl::item or be convertible from sycl::item");
+
+ using NameT =
+ typename detail::get_kernel_name_t<KernelName, KernelType>::name;
+ submitParallelFor<NameT, item<Dims>, KernelType>(rest...);
+ return getLastEvent();
+ }
+
+ /// Name of this function is defined by compiler. It generates call to this
+ /// function in the host implementation of KernelFunc in submitSingleTask or
+ /// submitParallelFor.
+ /// \param KernelName a name of the kernel being invoked.
+ /// \param args kernel arguments for kernel invocation.
+ // TODO: now `args` always represents single argument - lambda capture.
template <typename, typename... Args>
void sycl_kernel_launch(const char *KernelName, Args &&...args) {
static_assert((sizeof...(args) == 1) &&
@@ -221,6 +351,10 @@ class _LIBSYCL_EXPORT queue {
submitKernelImpl(KernelName, TypelessArgs);
}
+ /// The sycl_kernel_entry_point attribute facilitates the generation of an
+ /// offload kernel entry point function with parameters corresponding to the
+ /// (potentially decomposed) kernel arguments and a body that (potentially
+ /// reconstructs the arguments and) executes the kernel.
#ifdef SYCL_LANGUAGE_VERSION
# define _LIBSYCL_ENTRY_POINT_ATTR__(KernelName) \
[[clang::sycl_kernel_entry_point(KernelName)]]
@@ -228,18 +362,45 @@ class _LIBSYCL_EXPORT queue {
# define _LIBSYCL_ENTRY_POINT_ATTR__(KernelName)
#endif // SYCL_LANGUAGE_VERSION
+ /// Specifies the parameters and body of the generated offload kernel entry
+ /// point for single_task invocations. On host compiler generates call to
+ /// sycl_kernel_launch instead of KernelFunc invocation.
template <typename KernelName, typename KernelType>
_LIBSYCL_ENTRY_POINT_ATTR__(KernelName)
void submitSingleTask(const KernelType KernelFunc) {
KernelFunc();
}
- event getLastEvent();
- void submitKernelImpl(const char *KernelName,
- detail::ArgCollection &TypelessArgs);
+ /// Specifies the parameters and body of the generated offload kernel entry
+ /// point for parallel_for invocations. On host compiler generates call to
+ /// sycl_kernel_launch instead of KernelFunc invocation.
+ template <typename KernelName, typename ElementType, typename KernelType>
+ _LIBSYCL_ENTRY_POINT_ATTR__(KernelName)
+ void submitParallelFor(const KernelType KernelFunc) {
+#ifdef __SYCL_DEVICE_ONLY__
+ KernelFunc(detail::Builder::getElement(detail::declptr<ElementType>()));
+#endif
+ (void)KernelFunc;
+ }
+
+ /// Passes kernel parameters to runtime.
+ /// \param Events a collection of events representing dependencies of the
+ /// kernel to submit.
+ /// \param Range a unified view of range for kernel execution.
void setKernelParameters(const std::vector<event> &Events,
const detail::UnifiedRangeView &Range = {});
+ /// Passes kernel arguments to runtime.
+ /// If all dependencies are met and kernel can be submitted to backend - it is
+ /// done in this call.
+ /// \param KernelName a name of the kernel being invoked.
+ /// \param TypelessArgs a unified arguments collection.
+ void submitKernelImpl(const char *KernelName,
+ detail::ArgCollection &TypelessArgs);
+
+ /// \return an event representing last kernel invocation.
+ event getLastEvent();
+
queue(const std::shared_ptr<detail::QueueImpl> &Impl) : impl(Impl) {}
std::shared_ptr<detail::QueueImpl> impl;
diff --git a/libsycl/include/sycl/__spirv/spirv_vars.hpp b/libsycl/include/sycl/__spirv/spirv_vars.hpp
new file mode 100644
index 0000000000000..ec8c691b35e92
--- /dev/null
+++ b/libsycl/include/sycl/__spirv/spirv_vars.hpp
@@ -0,0 +1,75 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains SPIRV builtins needed for kernel invocations
+/// (parallel_for).
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBSYCL___SPIRV_SPIRV_VARS
+#define _LIBSYCL___SPIRV_SPIRV_VARS
+
+#ifdef __SYCL_DEVICE_ONLY__
+
+# include <cstddef>
+# include <cstdint>
+
+// SPIR-V built-in variables mapped to function call.
+# define _LIBSYCL_SYCL_DEVICE_ATTR __attribute__((sycl_external))
+
+_LIBSYCL_SYCL_DEVICE_ATTR __attribute__((const)) size_t
+__spirv_BuiltInGlobalInvocationId(int);
+_LIBSYCL_SYCL_DEVICE_ATTR __attribute__((const)) size_t
+__spirv_BuiltInGlobalSize(int);
+_LIBSYCL_SYCL_DEVICE_ATTR __attribute__((const)) size_t
+__spirv_BuiltInGlobalOffset(int);
+
+namespace __spirv {
+
+// Helper function templates to initialize and get vector component from SPIR-V
+// built-in variables
+# define __SPIRV_DEFINE_INIT_AND_GET_HELPERS(POSTFIX) \
+ template <int ID> size_t get##POSTFIX(); \
+ template <> size_t get##POSTFIX<0>() { return __spirv_##POSTFIX(0); } \
+ template <> size_t get##POSTFIX<1>() { return __spirv_##POSTFIX(1); } \
+ template <> size_t get##POSTFIX<2>() { return __spirv_##POSTFIX(2); } \
+ \
+ template <int Dim, class DstT> struct InitSizesST##POSTFIX; \
+ \
+ template <class DstT> struct InitSizesST##POSTFIX<1, DstT> { \
+ static DstT initSize() { return {get##POSTFIX<0>()}; } \
+ }; \
+ \
+ template <class DstT> struct InitSizesST##POSTFIX<2, DstT> { \
+ static DstT initSize() { \
+ return {get##POSTFIX<1>(), get##POSTFIX<0>()}; \
+ } \
+ }; \
+ \
+ template <class DstT> struct InitSizesST##POSTFIX<3, DstT> { \
+ static DstT initSize() { \
+ return {get##POSTFIX<2>(), get##POSTFIX<1>(), get##POSTFIX<0>()}; \
+ } \
+ }; \
+ \
+ template <int Dims, class DstT> DstT init##POSTFIX() { \
+ return InitSizesST##POSTFIX<Dims, DstT>::initSize(); \
+ }
+
+__SPIRV_DEFINE_INIT_AND_GET_HELPERS(BuiltInGlobalSize);
+__SPIRV_DEFINE_INIT_AND_GET_HELPERS(BuiltInGlobalInvocationId)
+__SPIRV_DEFINE_INIT_AND_GET_HELPERS(BuiltInGlobalOffset)
+
+# undef __SPIRV_DEFINE_INIT_AND_GET_HELPERS
+
+} // namespace __spirv
+
+#endif //__SYCL_DEVICE_ONLY__
+
+#endif // _LIBSYCL___SPIRV_SPIRV_VARS
diff --git a/libsycl/test/basic/queue_parallel_for_generic.cpp b/libsycl/test/basic/queue_parallel_for_generic.cpp
new file mode 100644
index 0000000000000..cac423b85f218
--- /dev/null
+++ b/libsycl/test/basic/queue_parallel_for_generic.cpp
@@ -0,0 +1,47 @@
+// REQUIRES: any-device
+// RUN: %clangxx -fsycl %s -o %t.out
+// RUN: %t.out
+
+#include <sycl/sycl.hpp>
+
+#include <cassert>
+#include <iostream>
+#include <type_traits>
+
+int main() {
+ // TODO: uncomment property once it is implemented. now all sycl::queue
+ // objects are in-order due to liboffload limitation. Test is intended to
+ // check in-order execution.
+ sycl::queue q{/*sycl::property::queue::in_order()*/};
+ auto dev = q.get_device();
+ auto ctx = q.get_context();
+ constexpr int N = 8;
+
+ auto A = static_cast<int *>(sycl::malloc_shared(N * sizeof(int), dev, ctx));
+
+ for (int i = 0; i < N; i++) {
+ A[i] = 1;
+ }
+
+ q.parallel_for<class Bar>(N, [=](auto i) {
+ static_assert(std::is_same<decltype(i), sycl::item<1>>::value,
+ "lambda arg type is unexpected");
+ A[i]++;
+ });
+
+ q.parallel_for<class Foo>({N}, [=](auto i) {
+ static_assert(std::is_same<decltype(i), sycl::item<1>>::value,
+ "lambda arg type is unexpected");
+ A[i]++;
+ });
+
+ // TODO: add kernel with offset and kernel with nd_range once they
+ // are implemented.
+
+ q.wait();
+
+ for (int i = 0; i < N; i++) {
+ assert(A[i] == 3);
+ }
+ sycl::free(A, ctx);
+}
diff --git a/libsycl/test/basic/wrapped_usm_pointers.cpp b/libsycl/test/basic/wrapped_usm_pointers.cpp
new file mode 100644
index 0000000000000..16a86963cc976
--- /dev/null
+++ b/libsycl/test/basic/wrapped_usm_pointers.cpp
@@ -0,0 +1,111 @@
+// REQUIRES: any-device
+// RUN: %clangxx -fsycl %s -o %t.out
+// RUN: %t.out
+
+#include <sycl/sycl.hpp>
+
+#include <iostream>
+
+struct Simple {
+ int *Data;
+ int Addition;
+};
+
+struct WrapperOfSimple {
+ int Addition;
+ Simple Obj;
+};
+
+struct NonTrivial {
+ int Addition;
+ int *Data;
+
+ NonTrivial(int *D, int A) : Data(D), Addition(A) {}
+};
+
+struct NonTrivialDerived : NonTrivial {
+ int AA = 0;
+ NonTrivialDerived(int *D, int A) : NonTrivial(D, A) {}
+};
+
+using namespace sycl;
+
+int main() {
+ constexpr int NumOfElements = 7;
+
+ queue Q;
+
+ NonTrivial NonTrivialObj(sycl::malloc_shared<int>(NumOfElements, Q), 38);
+ NonTrivialDerived NonTrivialDerivedObj(
+ sycl::malloc_shared<int>(NumOfElements, Q), 39);
+ Simple SimpleObj = {sycl::malloc_shared<int>(NumOfElements, Q), 42};
+ WrapperOfSimple WrapperOfSimpleObj = {
+ 300, {sycl::malloc_shared<int>(NumOfElements, Q), 100500}};
+
+ // Test simple struct containing pointer.
+ Q.parallel_for(NumOfElements, [=](id<1> Idx) {
+ SimpleObj.Data[Idx] = Idx + SimpleObj.Addition;
+ });
+
+ // Test simple non-trivial struct containing pointer.
+ Q.parallel_for(NumOfElements, [=](id<1> Idx) {
+ NonTrivialObj.Data[Idx] = Idx + NonTrivialObj.Addition;
+ });
+
+ // Test simple non-trivial derived struct containing pointer.
+ Q.parallel_for(NumOfElements, [=](id<1> Idx) {
+ NonTrivialDerivedObj.Data[Idx] = Idx + NonTrivialDerivedObj.Addition;
+ });
+
+ // Test nested struct containing pointer.
+ Q.parallel_for(NumOfElements, [=](id<1> Idx) {
+ WrapperOfSimpleObj.Obj.Data[Idx] = Idx + WrapperOfSimpleObj.Obj.Addition;
+ });
+
+ // Test array of structs containing pointers.
+ Simple SimpleArr[NumOfElements];
+ for (int i = 0; i < NumOfElements; ++i) {
+ SimpleArr[i].Data = sycl::malloc_shared<int>(NumOfElements, Q);
+ SimpleArr[i].Addition = 38 + i;
+ }
+
+ Q.parallel_for(range<2>(NumOfElements, NumOfElements), [=](item<2> Idx) {
+ SimpleArr[Idx.get_id(0)].Data[Idx.get_id(1)] =
+ Idx.get_id(1) + SimpleArr[Idx.get_id(0)].Addition;
+ });
+
+ Q.wait();
+
+ auto Checker = [](auto Obj) {
+ for (int i = 0; i < NumOfElements; ++i) {
+ if (Obj.Data[i] != (i + Obj.Addition)) {
+ std::cout << "line: " << __LINE__ << " result[" << i << "] is "
+ << Obj.Data[i] << " expected " << i + Obj.Addition
+ << std::endl;
+ return true; // true if fail
+ }
+ }
+
+ return false;
+ };
+
+ bool Fail = false;
+ Fail = Checker(SimpleObj);
+ Fail = Checker(NonTrivialObj);
+ Fail = Checker(NonTrivialDerivedObj);
+ Fail = Checker(WrapperOfSimpleObj.Obj);
+
+ for (int i = 0; i < NumOfElements; ++i)
+ Fail = Checker(SimpleArr[i]);
+
+ // Free allocated memory.
+ sycl::free(NonTrivialObj.Data, Q);
+ sycl::free(NonTrivialDerivedObj.Data, Q);
+ sycl::free(SimpleObj.Data, Q);
+ sycl::free(WrapperOfSimpleObj.Obj.Data, Q);
+
+ for (int i = 0; i < NumOfElements; ++i)
+ sycl::free(SimpleArr[i].Data, Q);
+
+ return Fail;
+}
>From a5b6a4c40d367b6f80d7ec43915d94a0707aa911 Mon Sep 17 00:00:00 2001
From: "Tikhomirova, Kseniya" <kseniya.tikhomirova at intel.com>
Date: Fri, 27 Mar 2026 10:37:44 -0700
Subject: [PATCH 3/7] removed invalid comment
Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova at intel.com>
---
libsycl/include/sycl/__impl/detail/kernel_arg_helpers.hpp | 2 --
1 file changed, 2 deletions(-)
diff --git a/libsycl/include/sycl/__impl/detail/kernel_arg_helpers.hpp b/libsycl/include/sycl/__impl/detail/kernel_arg_helpers.hpp
index d4a0ea9f63ff2..a7478e1300e21 100644
--- a/libsycl/include/sycl/__impl/detail/kernel_arg_helpers.hpp
+++ b/libsycl/include/sycl/__impl/detail/kernel_arg_helpers.hpp
@@ -5,8 +5,6 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-// to add
-//===----------------------------------------------------------------------===//
#ifndef _LIBSYCL___IMPL_DETAIL_KERNEL_ARG_HELPERS
#define _LIBSYCL___IMPL_DETAIL_KERNEL_ARG_HELPERS
>From 60af5a987839fac6b250e7519a74fe33ef8ff3b4 Mon Sep 17 00:00:00 2001
From: "Tikhomirova, Kseniya" <kseniya.tikhomirova at intel.com>
Date: Tue, 21 Apr 2026 04:40:29 -0700
Subject: [PATCH 4/7] fix merge errors
Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova at intel.com>
---
.../sycl/__impl/detail/arg_wrapper.hpp | 135 ------------------
libsycl/include/sycl/__impl/queue.hpp | 43 ------
2 files changed, 178 deletions(-)
delete mode 100644 libsycl/include/sycl/__impl/detail/arg_wrapper.hpp
diff --git a/libsycl/include/sycl/__impl/detail/arg_wrapper.hpp b/libsycl/include/sycl/__impl/detail/arg_wrapper.hpp
deleted file mode 100644
index 96f60a3121787..0000000000000
--- a/libsycl/include/sycl/__impl/detail/arg_wrapper.hpp
+++ /dev/null
@@ -1,135 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// This file contains helper functions used to wrap kernel arguments to
-/// typeless collection.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef _LIBSYCL___IMPL_DETAIL_ARG_WRAPPER_HPP
-#define _LIBSYCL___IMPL_DETAIL_ARG_WRAPPER_HPP
-
-#include <sycl/__impl/detail/config.hpp>
-#include <sycl/__impl/exception.hpp>
-
-#include <cassert>
-#include <memory>
-
-_LIBSYCL_BEGIN_NAMESPACE_SYCL
-
-namespace detail {
-
-/// Base class is needed for unification, we pass arguments through ABI
-/// boundary.
-class ArgWrapperBase {
-public:
- ArgWrapperBase(const ArgWrapperBase &) = delete;
- ArgWrapperBase &operator=(const ArgWrapperBase &) = delete;
- virtual ~ArgWrapperBase() = default;
-
- virtual void deepCopy() = 0;
- virtual size_t getSize() const = 0;
- virtual const void *getPtr() const = 0;
-
-protected:
- ArgWrapperBase() = default;
-};
-
-/// Helps to manage arguments in a typeless way.
-template <typename Type> class ArgWrapper : public ArgWrapperBase {
-public:
- ArgWrapper(Type &Arg) { Ptr = &Arg; }
- ArgWrapper(const ArgWrapper &) = delete;
- ArgWrapper &operator=(const ArgWrapper &) = delete;
-
- /// \return size of argument in bytes.
- size_t getSize() const override { return sizeof(Type); }
-
- /// Returns raw pointer to the corresponding argument.
- /// No copy is done by this method. It works with pointer to the memory whose
- /// existence must be guaranteed by class user or with copy that must be
- /// explicitly requested by class user via deepCopy method.
- /// \return pointer to the argument.
- const void *getPtr() const override {
- assert((!DeepCopy || (DeepCopy.get()) == Ptr) &&
- "Incorrect state of copied argument");
- return Ptr;
- }
-
- /// Copies agrument to RT owned storage.
- void deepCopy() override {
- if (DeepCopy)
- return;
-
- DeepCopy.reset(new Type(*Ptr));
- Ptr = DeepCopy.get();
- }
-
-private:
- Type *Ptr;
- std::unique_ptr<Type> DeepCopy;
-};
-
-/// Collection of arguments. Provides functionality to accumulate all arguments
-/// data to pass through ABI boundary.
-class ArgCollection {
-public:
- /// Adds argument to the collection. Don't own the memory. Argument lifetime
- /// must be guaranteed by class user. If extended lifetime is needed (copy),
- /// deepCopy must be called.
- template <typename Type> void addArg(Type &Arg) {
- MArgs.emplace_back(new ArgWrapper(Arg));
- }
-
- /// \return array of argument pointers.
- const void **getArgPtrArray() {
- if (MPtrs.size() != MArgs.size()) {
- MPtrs.clear();
- MPtrs.reserve(MArgs.size());
- auto it = MArgs.cbegin();
- while (it != MArgs.cend()) {
- MPtrs.push_back((*it++)->getPtr());
- }
- }
- return MPtrs.data();
- }
-
- /// \return array of argument sizes.
- int64_t *getSizesArray() {
- if (MSizes.size() != MArgs.size()) {
- MSizes.clear();
- MSizes.reserve(MArgs.size());
- auto it = MArgs.cbegin();
- while (it != MArgs.cend()) {
- MSizes.push_back(static_cast<int64_t>((*it++)->getSize()));
- }
- }
- return MSizes.data();
- }
-
- /// \return count of arguments in collection.
- size_t getArgCount() { return MArgs.size(); }
-
- /// Extends arguments lifetime by doing copy of all arguments.
- void deepCopy() {
- for (auto &Arg : MArgs)
- Arg->deepCopy();
- }
-
-private:
- std::vector<std::unique_ptr<ArgWrapperBase>> MArgs;
- std::vector<int64_t> MSizes;
- std::vector<const void *> MPtrs;
-};
-
-} // namespace detail
-
-_LIBSYCL_END_NAMESPACE_SYCL
-
-#endif // _LIBSYCL___IMPL_DETAIL_ARG_WRAPPER_HPP
diff --git a/libsycl/include/sycl/__impl/queue.hpp b/libsycl/include/sycl/__impl/queue.hpp
index 5f31777c09cf7..e3856d2f5b4b6 100644
--- a/libsycl/include/sycl/__impl/queue.hpp
+++ b/libsycl/include/sycl/__impl/queue.hpp
@@ -20,7 +20,6 @@
#include <sycl/__impl/event.hpp>
#include <sycl/__impl/property_list.hpp>
-#include <sycl/__impl/detail/arg_wrapper.hpp>
#include <sycl/__impl/detail/config.hpp>
#include <sycl/__impl/detail/default_async_handler.hpp>
#include <sycl/__impl/detail/get_device_kernel_info.hpp>
@@ -311,48 +310,6 @@ class _LIBSYCL_EXPORT queue {
/// exceptions.
void wait();
- /// Defines and invokes a SYCL kernel function as a lambda expression or a
- /// named function object type.
- ///
- /// \param kernelFunc is the kernel functor or lambda.
- /// \return an event that represents the status of the submitted kernel.
- template <typename KernelName, typename KernelType>
- event single_task(const KernelType &kernelFunc) {
- return single_task<KernelName, KernelType>({}, kernelFunc);
- }
-
- /// Defines and invokes a SYCL kernel function as a lambda expression or a
- /// named function object type.
- ///
- /// \param depEvent is an event that specifies the kernel dependency.
- /// \param kernelFunc is the kernel functor or lambda.
- /// \return an event that represents the status of the submitted kernel.
- template <typename KernelName, typename KernelType>
- event single_task(event depEvent, const KernelType &kernelFunc) {
- return single_task<KernelName, KernelType>({depEvent}, kernelFunc);
- }
-
- /// Defines and invokes a SYCL kernel function as a lambda expression or a
- /// named function object type.
- ///
- /// \param depEvents is a collection of events that specify the kernel
- /// dependencies.
- /// \param kernelFunc is the kernel functor or lambda.
- /// \return an event that represents the status of the submitted kernel.
- template <typename KernelName, typename KernelType>
- event single_task(const std::vector<event> &depEvents,
- const KernelType &kernelFunc) {
- static_assert(
- detail::CheckFunctionSignature<std::remove_reference_t<KernelType>,
- void()>::value,
- "sycl::queue::single_task() requires a kernel instead of a command "
- "group");
-
- setKernelParameters(depEvents);
- submitSingleTask<KernelName, KernelType>(kernelFunc);
- return getLastEvent();
- }
-
private:
template <typename KernelName, int Dims, typename... Rest>
event parallelForImpl(range<Dims> numWorkItems,
>From 2861b1031d9c712c8c43c360b1fc88ab3ba388ec Mon Sep 17 00:00:00 2001
From: "Tikhomirova, Kseniya" <kseniya.tikhomirova at intel.com>
Date: Tue, 21 Apr 2026 08:46:55 -0700
Subject: [PATCH 5/7] fix more comments and revert some merge issues
Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova at intel.com>
---
.../sycl/__impl/detail/kernel_arg_helpers.hpp | 11 ++++++++--
.../sycl/__impl/index_space_classes.hpp | 20 +++++++++----------
libsycl/include/sycl/__impl/queue.hpp | 20 +++++++++----------
libsycl/include/sycl/__spirv/spirv_vars.hpp | 10 +++-------
libsycl/include/sycl/sycl.hpp | 1 +
libsycl/src/detail/queue_impl.cpp | 1 +
libsycl/src/detail/queue_impl.hpp | 1 -
libsycl/test/basic/wrapped_usm_pointers.cpp | 10 +++++-----
8 files changed, 38 insertions(+), 36 deletions(-)
diff --git a/libsycl/include/sycl/__impl/detail/kernel_arg_helpers.hpp b/libsycl/include/sycl/__impl/detail/kernel_arg_helpers.hpp
index a7478e1300e21..f3d733981922a 100644
--- a/libsycl/include/sycl/__impl/detail/kernel_arg_helpers.hpp
+++ b/libsycl/include/sycl/__impl/detail/kernel_arg_helpers.hpp
@@ -5,6 +5,11 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains helpers for kernel invocation.
+///
+//===----------------------------------------------------------------------===//
#ifndef _LIBSYCL___IMPL_DETAIL_KERNEL_ARG_HELPERS
#define _LIBSYCL___IMPL_DETAIL_KERNEL_ARG_HELPERS
@@ -17,11 +22,13 @@
# include <sycl/__spirv/spirv_vars.hpp>
#endif
+#include <type_traits>
+
_LIBSYCL_BEGIN_NAMESPACE_SYCL
namespace detail {
-/// \name Helpers for the unnamed lambda extension.
+/// \name Helpers for the unnamed lambda.
/// @{
/// This class is the default kernel name template parameter type for kernel
/// invocation APIs such as single_task.
@@ -35,7 +42,7 @@ template <typename Name, typename Type> struct get_kernel_name_t {
};
/// Specialization for the case when Name is undefined.
-/// This is only legal with our compiler with the unnamed lambda extension or if
+/// This is only legal with our compiler with the unnamed lambda support or if
/// the kernel is a functor object.
template <typename Type> struct get_kernel_name_t<detail::AutoName, Type> {
using name = Type;
diff --git a/libsycl/include/sycl/__impl/index_space_classes.hpp b/libsycl/include/sycl/__impl/index_space_classes.hpp
index ef2897cee5307..0dc8e90decc3d 100644
--- a/libsycl/include/sycl/__impl/index_space_classes.hpp
+++ b/libsycl/include/sycl/__impl/index_space_classes.hpp
@@ -17,6 +17,9 @@
#include <sycl/__impl/detail/config.hpp>
+#include <cstddef>
+#include <type_traits>
+
_LIBSYCL_BEGIN_NAMESPACE_SYCL
namespace detail {
@@ -95,12 +98,7 @@ template <int Dimensions = 1> class RawArray {
friend bool operator!=(const RawArray<Dimensions> &lhs,
const RawArray<Dimensions> &rhs) {
- for (int i = 0; i < Dimensions; ++i) {
- if (lhs.MArray[i] != rhs.MArray[i]) {
- return true;
- }
- }
- return false;
+ return !(lhs == rhs);
}
protected:
@@ -370,13 +368,13 @@ template <int Dimensions /* = 1*/, bool WithOffset /* = true*/> class item {
std::size_t get_linear_id() const noexcept {
if constexpr (WithOffset) {
if constexpr (1 == Dimensions) {
- return MId;
+ return MId[0] - MOffset[0];
}
if constexpr (2 == Dimensions) {
- return (MId[0] - MOffset[0]) * MRange[1] + (MId[1] - MOffset[1]);
+ return (MId[0] - MOffset[0]) * MRange[1] + MId[1] - MOffset[1];
}
- return ((MId[0] - MOffset[0]) * MRange[1] * MRange[2]) +
- ((MId[1] - MOffset[1]) * MRange[2]) + (MId[2] - MOffset[2]);
+ return (MId[0] - MOffset[0]) * MRange[1] * MRange[2] +
+ (MId[1] - MOffset[1]) * MRange[2] + MId[2] - MOffset[2];
} else {
if constexpr (1 == Dimensions) {
return MId[0];
@@ -384,7 +382,7 @@ template <int Dimensions /* = 1*/, bool WithOffset /* = true*/> class item {
if constexpr (2 == Dimensions) {
return MId[0] * MRange[1] + MId[1];
}
- return (MId[0] * MRange[1] * MRange[2]) + (MId[1] * MRange[2]) + MId[2];
+ return MId[0] * MRange[1] * MRange[2] + MId[1] * MRange[2] + MId[2];
}
}
diff --git a/libsycl/include/sycl/__impl/queue.hpp b/libsycl/include/sycl/__impl/queue.hpp
index e3856d2f5b4b6..ea96ee03da5ee 100644
--- a/libsycl/include/sycl/__impl/queue.hpp
+++ b/libsycl/include/sycl/__impl/queue.hpp
@@ -138,6 +138,11 @@ class _LIBSYCL_EXPORT queue {
template <typename Param>
typename Param::return_type get_backend_info() const;
+ /// Blocks the calling thread until all commands previously submitted to this
+ /// queue have completed. Synchronous errors are reported through SYCL
+ /// exceptions.
+ void wait();
+
/// Defines and invokes a SYCL kernel function as a lambda expression or a
/// named function object type.
///
@@ -172,8 +177,8 @@ class _LIBSYCL_EXPORT queue {
static_assert(
(detail::CheckFunctionSignature<std::remove_reference_t<KernelType>,
void()>::value),
- "sycl::queue::single_task() requires a kernel instead of command "
- "group. ");
+ "sycl::queue::single_task() requires a kernel instead of a command "
+ "group");
setKernelParameters(depEvents);
using NameT =
@@ -305,18 +310,13 @@ class _LIBSYCL_EXPORT queue {
std::forward<Rest>(rest)...);
}
- /// Blocks the calling thread until all commands previously submitted to this
- /// queue have completed. Synchronous errors are reported through SYCL
- /// exceptions.
- void wait();
-
private:
template <typename KernelName, int Dims, typename... Rest>
event parallelForImpl(range<Dims> numWorkItems,
const std::vector<event> &depEvents, Rest &&...rest) {
if constexpr (sizeof...(Rest) != 1)
throw sycl::exception(errc::feature_not_supported,
- "Reductions are not supported.");
+ "Reductions are not supported");
setKernelParameters(depEvents, numWorkItems);
using KernelType =
@@ -366,7 +366,7 @@ class _LIBSYCL_EXPORT queue {
/// sycl_kernel_launch instead of KernelFunc invocation.
template <typename KernelName, typename KernelType>
_LIBSYCL_ENTRY_POINT_ATTR__(KernelName)
- void submitSingleTask(const KernelType KernelFunc) {
+ void submitSingleTask(const KernelType &KernelFunc) {
KernelFunc();
}
@@ -375,7 +375,7 @@ class _LIBSYCL_EXPORT queue {
/// sycl_kernel_launch instead of KernelFunc invocation.
template <typename KernelName, typename ElementType, typename KernelType>
_LIBSYCL_ENTRY_POINT_ATTR__(KernelName)
- void submitParallelFor(const KernelType KernelFunc) {
+ void submitParallelFor(const KernelType &KernelFunc) {
#ifdef __SYCL_DEVICE_ONLY__
KernelFunc(detail::Builder::getElement(detail::declptr<ElementType>()));
#endif
diff --git a/libsycl/include/sycl/__spirv/spirv_vars.hpp b/libsycl/include/sycl/__spirv/spirv_vars.hpp
index ec8c691b35e92..2c93e510565b3 100644
--- a/libsycl/include/sycl/__spirv/spirv_vars.hpp
+++ b/libsycl/include/sycl/__spirv/spirv_vars.hpp
@@ -21,14 +21,10 @@
# include <cstdint>
// SPIR-V built-in variables mapped to function call.
-# define _LIBSYCL_SYCL_DEVICE_ATTR __attribute__((sycl_external))
-_LIBSYCL_SYCL_DEVICE_ATTR __attribute__((const)) size_t
-__spirv_BuiltInGlobalInvocationId(int);
-_LIBSYCL_SYCL_DEVICE_ATTR __attribute__((const)) size_t
-__spirv_BuiltInGlobalSize(int);
-_LIBSYCL_SYCL_DEVICE_ATTR __attribute__((const)) size_t
-__spirv_BuiltInGlobalOffset(int);
+__attribute__((const)) size_t __spirv_BuiltInGlobalInvocationId(int);
+__attribute__((const)) size_t __spirv_BuiltInGlobalSize(int);
+__attribute__((const)) size_t __spirv_BuiltInGlobalOffset(int);
namespace __spirv {
diff --git a/libsycl/include/sycl/sycl.hpp b/libsycl/include/sycl/sycl.hpp
index ce9fc8defd90b..7e81d952bd41c 100644
--- a/libsycl/include/sycl/sycl.hpp
+++ b/libsycl/include/sycl/sycl.hpp
@@ -19,6 +19,7 @@
#include <sycl/__impl/device_selector.hpp>
#include <sycl/__impl/event.hpp>
#include <sycl/__impl/exception.hpp>
+#include <sycl/__impl/index_space_classes.hpp>
#include <sycl/__impl/platform.hpp>
#include <sycl/__impl/queue.hpp>
#include <sycl/__impl/usm_functions.hpp>
diff --git a/libsycl/src/detail/queue_impl.cpp b/libsycl/src/detail/queue_impl.cpp
index 93a1f43d25bf6..623b326637932 100644
--- a/libsycl/src/detail/queue_impl.cpp
+++ b/libsycl/src/detail/queue_impl.cpp
@@ -20,6 +20,7 @@ namespace detail {
static void setKernelLaunchArgs(const detail::UnifiedRangeView &Range,
ol_kernel_launch_size_args_t &ArgsToSet) {
+ assert(Range.MDims < 4 && "Invalid dimensions.");
uint32_t GlobalSize[3] = {1, 1, 1};
if (Range.MGlobalSize) {
for (size_t I = 0; I < Range.MDims; ++I) {
diff --git a/libsycl/src/detail/queue_impl.hpp b/libsycl/src/detail/queue_impl.hpp
index a504c467e3927..8800464e96612 100644
--- a/libsycl/src/detail/queue_impl.hpp
+++ b/libsycl/src/detail/queue_impl.hpp
@@ -21,7 +21,6 @@
#include <OffloadAPI.h>
#include <memory>
-#include <mutex>
_LIBSYCL_BEGIN_NAMESPACE_SYCL
namespace detail {
diff --git a/libsycl/test/basic/wrapped_usm_pointers.cpp b/libsycl/test/basic/wrapped_usm_pointers.cpp
index 16a86963cc976..c936dcada4a6b 100644
--- a/libsycl/test/basic/wrapped_usm_pointers.cpp
+++ b/libsycl/test/basic/wrapped_usm_pointers.cpp
@@ -90,13 +90,13 @@ int main() {
};
bool Fail = false;
- Fail = Checker(SimpleObj);
- Fail = Checker(NonTrivialObj);
- Fail = Checker(NonTrivialDerivedObj);
- Fail = Checker(WrapperOfSimpleObj.Obj);
+ Fail |= Checker(SimpleObj);
+ Fail |= Checker(NonTrivialObj);
+ Fail |= Checker(NonTrivialDerivedObj);
+ Fail |= Checker(WrapperOfSimpleObj.Obj);
for (int i = 0; i < NumOfElements; ++i)
- Fail = Checker(SimpleArr[i]);
+ Fail |= Checker(SimpleArr[i]);
// Free allocated memory.
sycl::free(NonTrivialObj.Data, Q);
>From b05c6d1017482699ef865187d0c679a870777676 Mon Sep 17 00:00:00 2001
From: "Tikhomirova, Kseniya" <kseniya.tikhomirova at intel.com>
Date: Wed, 22 Apr 2026 09:12:44 -0700
Subject: [PATCH 6/7] fix comments
Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova at intel.com>
---
.../sycl/__impl/detail/kernel_arg_helpers.hpp | 7 +-
libsycl/include/sycl/__impl/queue.hpp | 13 ++-
libsycl/include/sycl/__spirv/spirv_vars.hpp | 46 ++++-----
libsycl/test/basic/parallel_for_indexers.cpp | 98 +++++++++++++++++++
4 files changed, 127 insertions(+), 37 deletions(-)
create mode 100644 libsycl/test/basic/parallel_for_indexers.cpp
diff --git a/libsycl/include/sycl/__impl/detail/kernel_arg_helpers.hpp b/libsycl/include/sycl/__impl/detail/kernel_arg_helpers.hpp
index f3d733981922a..d58df91f19465 100644
--- a/libsycl/include/sycl/__impl/detail/kernel_arg_helpers.hpp
+++ b/libsycl/include/sycl/__impl/detail/kernel_arg_helpers.hpp
@@ -18,9 +18,7 @@
#include <sycl/__impl/detail/config.hpp>
-#ifdef __SYCL_DEVICE_ONLY__
-# include <sycl/__spirv/spirv_vars.hpp>
-#endif
+#include <sycl/__spirv/spirv_vars.hpp>
#include <type_traits>
@@ -121,7 +119,6 @@ class Builder {
public:
Builder() = delete;
-#ifdef __SYCL_DEVICE_ONLY__
/// \return a global index of work item currently being operated on by device.
template <int Dims> static const id<Dims> getElement(id<Dims> *) {
static_assert(isValidDimensions<Dims>, "invalid dimensions");
@@ -181,8 +178,6 @@ class Builder {
-> decltype(getItem<Dims, WithOffset>()) {
return getItem<Dims, WithOffset>();
}
-
-#endif // __SYCL_DEVICE_ONLY__
};
} // namespace detail
diff --git a/libsycl/include/sycl/__impl/queue.hpp b/libsycl/include/sycl/__impl/queue.hpp
index ea96ee03da5ee..776cbabfe9c8e 100644
--- a/libsycl/include/sycl/__impl/queue.hpp
+++ b/libsycl/include/sycl/__impl/queue.hpp
@@ -323,13 +323,19 @@ class _LIBSYCL_EXPORT queue {
std::decay_t<detail::nth_type_t<sizeof...(Rest) - 1, Rest...>>;
using LambdaArgType = sycl::detail::lambda_arg_type<KernelType, item<Dims>>;
static_assert(
- std::is_convertible_v<sycl::item<Dims>, LambdaArgType>,
+ std::is_convertible_v<sycl::item<Dims>, LambdaArgType> ||
+ std::is_convertible_v<sycl::item<Dims, false>, LambdaArgType>,
"Kernel argument of a sycl::parallel_for with sycl::range "
"must be either sycl::item or be convertible from sycl::item");
+ using TranformedLambdaArgType = std::conditional_t<
+ std::is_convertible_v<item<Dims>, LambdaArgType>, item<Dims>,
+ std::conditional_t<
+ std::is_convertible_v<item<Dims, false>, LambdaArgType>,
+ item<Dims, false>, LambdaArgType>>;
using NameT =
typename detail::get_kernel_name_t<KernelName, KernelType>::name;
- submitParallelFor<NameT, item<Dims>, KernelType>(rest...);
+ submitParallelFor<NameT, TranformedLambdaArgType, KernelType>(rest...);
return getLastEvent();
}
@@ -376,10 +382,7 @@ class _LIBSYCL_EXPORT queue {
template <typename KernelName, typename ElementType, typename KernelType>
_LIBSYCL_ENTRY_POINT_ATTR__(KernelName)
void submitParallelFor(const KernelType &KernelFunc) {
-#ifdef __SYCL_DEVICE_ONLY__
KernelFunc(detail::Builder::getElement(detail::declptr<ElementType>()));
-#endif
- (void)KernelFunc;
}
#undef _LIBSYCL_ENTRY_POINT_ATTR__
diff --git a/libsycl/include/sycl/__spirv/spirv_vars.hpp b/libsycl/include/sycl/__spirv/spirv_vars.hpp
index 2c93e510565b3..c8d2c990d76c3 100644
--- a/libsycl/include/sycl/__spirv/spirv_vars.hpp
+++ b/libsycl/include/sycl/__spirv/spirv_vars.hpp
@@ -15,8 +15,6 @@
#ifndef _LIBSYCL___SPIRV_SPIRV_VARS
#define _LIBSYCL___SPIRV_SPIRV_VARS
-#ifdef __SYCL_DEVICE_ONLY__
-
# include <cstddef>
# include <cstdint>
@@ -30,33 +28,31 @@ namespace __spirv {
// Helper function templates to initialize and get vector component from SPIR-V
// built-in variables
-# define __SPIRV_DEFINE_INIT_AND_GET_HELPERS(POSTFIX) \
- template <int ID> size_t get##POSTFIX(); \
- template <> size_t get##POSTFIX<0>() { return __spirv_##POSTFIX(0); } \
- template <> size_t get##POSTFIX<1>() { return __spirv_##POSTFIX(1); } \
- template <> size_t get##POSTFIX<2>() { return __spirv_##POSTFIX(2); } \
+#define __SPIRV_DEFINE_INIT_AND_GET_HELPERS(POSTFIX) \
+ template <int ID> size_t get##POSTFIX(); \
+ template <> inline size_t get##POSTFIX<0>() { return __spirv_##POSTFIX(0); } \
+ template <> inline size_t get##POSTFIX<1>() { return __spirv_##POSTFIX(1); } \
+ template <> inline size_t get##POSTFIX<2>() { return __spirv_##POSTFIX(2); } \
\
- template <int Dim, class DstT> struct InitSizesST##POSTFIX; \
+ template <int Dim, class DstT> struct InitSizesST##POSTFIX; \
\
- template <class DstT> struct InitSizesST##POSTFIX<1, DstT> { \
- static DstT initSize() { return {get##POSTFIX<0>()}; } \
- }; \
+ template <class DstT> struct InitSizesST##POSTFIX<1, DstT> { \
+ static DstT initSize() { return {get##POSTFIX<0>()}; } \
+ }; \
\
- template <class DstT> struct InitSizesST##POSTFIX<2, DstT> { \
- static DstT initSize() { \
- return {get##POSTFIX<1>(), get##POSTFIX<0>()}; \
- } \
- }; \
+ template <class DstT> struct InitSizesST##POSTFIX<2, DstT> { \
+ static DstT initSize() { return {get##POSTFIX<1>(), get##POSTFIX<0>()}; } \
+ }; \
\
- template <class DstT> struct InitSizesST##POSTFIX<3, DstT> { \
- static DstT initSize() { \
- return {get##POSTFIX<2>(), get##POSTFIX<1>(), get##POSTFIX<0>()}; \
- } \
- }; \
+ template <class DstT> struct InitSizesST##POSTFIX<3, DstT> { \
+ static DstT initSize() { \
+ return {get##POSTFIX<2>(), get##POSTFIX<1>(), get##POSTFIX<0>()}; \
+ } \
+ }; \
\
- template <int Dims, class DstT> DstT init##POSTFIX() { \
- return InitSizesST##POSTFIX<Dims, DstT>::initSize(); \
- }
+ template <int Dims, class DstT> DstT init##POSTFIX() { \
+ return InitSizesST##POSTFIX<Dims, DstT>::initSize(); \
+ }
__SPIRV_DEFINE_INIT_AND_GET_HELPERS(BuiltInGlobalSize);
__SPIRV_DEFINE_INIT_AND_GET_HELPERS(BuiltInGlobalInvocationId)
@@ -66,6 +62,4 @@ __SPIRV_DEFINE_INIT_AND_GET_HELPERS(BuiltInGlobalOffset)
} // namespace __spirv
-#endif //__SYCL_DEVICE_ONLY__
-
#endif // _LIBSYCL___SPIRV_SPIRV_VARS
diff --git a/libsycl/test/basic/parallel_for_indexers.cpp b/libsycl/test/basic/parallel_for_indexers.cpp
new file mode 100644
index 0000000000000..e9cef87e7472b
--- /dev/null
+++ b/libsycl/test/basic/parallel_for_indexers.cpp
@@ -0,0 +1,98 @@
+// REQUIRES: any-device
+// RUN: %clangxx -fsycl -Wno-error=deprecated-declarations %s -o %t.out
+// RUN: %t.out
+
+#include <sycl/sycl.hpp>
+
+#include <cassert>
+#include <memory>
+
+using namespace sycl;
+
+// TODO: original test works with buffers, revert changes to USM once they are
+// implemented.
+// TODO add cases with dimensions more than 1
+int main() {
+ bool Fail{};
+
+ constexpr size_t DataSize = 10;
+ const range<1> globalRange(6);
+ // Id indexer
+ {
+ queue Q;
+ int *Data = sycl::malloc_shared<int>(DataSize, Q);
+ for (size_t i = 0; i < DataSize; ++i)
+ Data[i] = -1;
+
+ Q.parallel_for<class id1>(globalRange,
+ [=](id<1> index) { Data[index] = index[0]; });
+ Q.wait();
+
+ for (size_t i = 0; i < DataSize; i++) {
+ const int id = Data[i];
+ if (i < globalRange[0]) {
+ Fail |= !(id == i);
+ } else {
+ Fail |= !(id == -1);
+ }
+ }
+
+ free(Data, Q);
+ }
+ // print and return;
+
+ // Item indexer without offset
+ {
+ // TODO: replace strcut with sycl::int2 once implemented.
+ struct DoubleInt {
+ int First;
+ int Second;
+ };
+ queue Q;
+ DoubleInt *Data = sycl::malloc_shared<DoubleInt>(DataSize, Q);
+ for (size_t i = 0; i < DataSize; ++i)
+ Data[i] = {-1, -1};
+
+ Q.parallel_for<class item1_nooffset>(
+ globalRange, [=](item<1, false> index) {
+ Data[index.get_id()] = {int(index.get_id()[0]),
+ int(index.get_range()[0])};
+ });
+ Q.wait();
+ for (size_t i = 0; i < DataSize; ++i) {
+ const int id = Data[i].First;
+ const int range = Data[i].Second;
+ if (i < globalRange[0]) {
+ Fail |= !(id == i);
+ Fail |= !(range == globalRange[0]);
+ } else {
+ Fail |= !(id == -1);
+ Fail |= !(range == -1);
+ }
+ }
+ free(Data, Q);
+ }
+
+ // get_linear_id()
+ {
+ queue Q;
+ size_t DataSize3D = DataSize * DataSize * DataSize;
+ int *Data = sycl::malloc_shared<int>(DataSize3D, Q);
+ Q.parallel_for(range<3>(DataSize, DataSize, DataSize), [=](item<3> Idx) {
+ auto Id = Idx.get_linear_id();
+ Data[Id] = Id;
+ });
+ Q.wait();
+ for (size_t i = 0; i < DataSize3D; ++i) {
+ Fail |= !(Data[i] == i);
+ }
+ free(Data, Q);
+ }
+
+ // TODO: Item indexer with offset
+ // blocked by liboffload support
+ // blocked by absence of sycl::handler implementation
+
+ // TODO: add nd_item check
+ return Fail;
+}
>From 51cd8902bba241421f4ee188e3f944c7f5c0aa94 Mon Sep 17 00:00:00 2001
From: "Tikhomirova, Kseniya" <kseniya.tikhomirova at intel.com>
Date: Thu, 23 Apr 2026 04:05:03 -0700
Subject: [PATCH 7/7] fix tests
Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova at intel.com>
---
.../sycl/__impl/index_space_classes.hpp | 3 +-
libsycl/include/sycl/__spirv/spirv_vars.hpp | 6 +-
libsycl/test/basic/parallel_for_indexers.cpp | 75 ++++++++-----------
.../test/basic/queue_parallel_for_generic.cpp | 34 ++++++---
4 files changed, 61 insertions(+), 57 deletions(-)
diff --git a/libsycl/include/sycl/__impl/index_space_classes.hpp b/libsycl/include/sycl/__impl/index_space_classes.hpp
index 0dc8e90decc3d..d47f803886235 100644
--- a/libsycl/include/sycl/__impl/index_space_classes.hpp
+++ b/libsycl/include/sycl/__impl/index_space_classes.hpp
@@ -19,6 +19,7 @@
#include <cstddef>
#include <type_traits>
+#include <variant>
_LIBSYCL_BEGIN_NAMESPACE_SYCL
@@ -401,7 +402,7 @@ template <int Dimensions /* = 1*/, bool WithOffset /* = true*/> class item {
private:
range<Dimensions> MRange;
id<Dimensions> MId;
- id<Dimensions> MOffset;
+ std::conditional_t<WithOffset, id<Dimensions>, std::monostate> MOffset;
friend class detail::Builder;
};
diff --git a/libsycl/include/sycl/__spirv/spirv_vars.hpp b/libsycl/include/sycl/__spirv/spirv_vars.hpp
index c8d2c990d76c3..450f581d9506d 100644
--- a/libsycl/include/sycl/__spirv/spirv_vars.hpp
+++ b/libsycl/include/sycl/__spirv/spirv_vars.hpp
@@ -15,8 +15,8 @@
#ifndef _LIBSYCL___SPIRV_SPIRV_VARS
#define _LIBSYCL___SPIRV_SPIRV_VARS
-# include <cstddef>
-# include <cstdint>
+#include <cstddef>
+#include <cstdint>
// SPIR-V built-in variables mapped to function call.
@@ -58,7 +58,7 @@ __SPIRV_DEFINE_INIT_AND_GET_HELPERS(BuiltInGlobalSize);
__SPIRV_DEFINE_INIT_AND_GET_HELPERS(BuiltInGlobalInvocationId)
__SPIRV_DEFINE_INIT_AND_GET_HELPERS(BuiltInGlobalOffset)
-# undef __SPIRV_DEFINE_INIT_AND_GET_HELPERS
+#undef __SPIRV_DEFINE_INIT_AND_GET_HELPERS
} // namespace __spirv
diff --git a/libsycl/test/basic/parallel_for_indexers.cpp b/libsycl/test/basic/parallel_for_indexers.cpp
index e9cef87e7472b..078508f53fff5 100644
--- a/libsycl/test/basic/parallel_for_indexers.cpp
+++ b/libsycl/test/basic/parallel_for_indexers.cpp
@@ -11,12 +11,11 @@ using namespace sycl;
// TODO: original test works with buffers, revert changes to USM once they are
// implemented.
-// TODO add cases with dimensions more than 1
int main() {
bool Fail{};
constexpr size_t DataSize = 10;
- const range<1> globalRange(6);
+ const range<1> GlobalRange(6);
// Id indexer
{
queue Q;
@@ -24,29 +23,31 @@ int main() {
for (size_t i = 0; i < DataSize; ++i)
Data[i] = -1;
- Q.parallel_for<class id1>(globalRange,
- [=](id<1> index) { Data[index] = index[0]; });
+ Q.parallel_for<class id1>(GlobalRange,
+ [=](id<1> Index) { Data[Index] = Index[0]; });
Q.wait();
- for (size_t i = 0; i < DataSize; i++) {
- const int id = Data[i];
- if (i < globalRange[0]) {
- Fail |= !(id == i);
- } else {
- Fail |= !(id == -1);
+ Fail |= [&]() {
+ for (size_t i = 0; i < DataSize; ++i) {
+ const int ExpectedVal = i < GlobalRange[0] ? i : -1;
+ if (Data[i] != ExpectedVal) {
+ std::cout << "line: " << __LINE__ << " Data[" << i << "] is "
+ << Data[i] << " expected " << ExpectedVal << std::endl;
+ return true;
+ }
}
- }
+ return false;
+ }();
free(Data, Q);
}
- // print and return;
// Item indexer without offset
{
// TODO: replace strcut with sycl::int2 once implemented.
struct DoubleInt {
- int First;
- int Second;
+ int Id;
+ int Range;
};
queue Q;
DoubleInt *Data = sycl::malloc_shared<DoubleInt>(DataSize, Q);
@@ -54,38 +55,26 @@ int main() {
Data[i] = {-1, -1};
Q.parallel_for<class item1_nooffset>(
- globalRange, [=](item<1, false> index) {
- Data[index.get_id()] = {int(index.get_id()[0]),
- int(index.get_range()[0])};
+ GlobalRange, [=](item<1, false> Index) {
+ Data[Index.get_id()] = {int(Index.get_id()[0]),
+ int(Index.get_range()[0])};
});
Q.wait();
- for (size_t i = 0; i < DataSize; ++i) {
- const int id = Data[i].First;
- const int range = Data[i].Second;
- if (i < globalRange[0]) {
- Fail |= !(id == i);
- Fail |= !(range == globalRange[0]);
- } else {
- Fail |= !(id == -1);
- Fail |= !(range == -1);
- }
- }
- free(Data, Q);
- }
- // get_linear_id()
- {
- queue Q;
- size_t DataSize3D = DataSize * DataSize * DataSize;
- int *Data = sycl::malloc_shared<int>(DataSize3D, Q);
- Q.parallel_for(range<3>(DataSize, DataSize, DataSize), [=](item<3> Idx) {
- auto Id = Idx.get_linear_id();
- Data[Id] = Id;
- });
- Q.wait();
- for (size_t i = 0; i < DataSize3D; ++i) {
- Fail |= !(Data[i] == i);
- }
+ Fail |= [&]() {
+ for (size_t i = 0; i < DataSize; ++i) {
+ const int ExpectedValID = i < GlobalRange[0] ? i : -1;
+ const int ExpectedValRange = i < GlobalRange[0] ? GlobalRange[0] : -1;
+ if (Data[i].Id != ExpectedValID || Data[i].Range != ExpectedValRange) {
+ std::cout << "line: " << __LINE__ << " Data[" << i << "] is {"
+ << Data[i].Id << ", " << Data[i].Range << "} expected {"
+ << ExpectedValID << ", " << ExpectedValRange << "}"
+ << std::endl;
+ return true;
+ }
+ }
+ return false;
+ }();
free(Data, Q);
}
diff --git a/libsycl/test/basic/queue_parallel_for_generic.cpp b/libsycl/test/basic/queue_parallel_for_generic.cpp
index cac423b85f218..70a191e6ab220 100644
--- a/libsycl/test/basic/queue_parallel_for_generic.cpp
+++ b/libsycl/test/basic/queue_parallel_for_generic.cpp
@@ -12,36 +12,50 @@ int main() {
// TODO: uncomment property once it is implemented. now all sycl::queue
// objects are in-order due to liboffload limitation. Test is intended to
// check in-order execution.
- sycl::queue q{/*sycl::property::queue::in_order()*/};
- auto dev = q.get_device();
- auto ctx = q.get_context();
+ sycl::queue Q{/*sycl::property::queue::in_order()*/};
+ auto Dev = Q.get_device();
+ auto Ctx = Q.get_context();
constexpr int N = 8;
- auto A = static_cast<int *>(sycl::malloc_shared(N * sizeof(int), dev, ctx));
+ auto A = static_cast<int *>(sycl::malloc_shared(N * sizeof(int), Dev, Ctx));
- for (int i = 0; i < N; i++) {
+ for (int i = 0; i < N; ++i) {
A[i] = 1;
}
- q.parallel_for<class Bar>(N, [=](auto i) {
+ Q.parallel_for<class IntRange>(N, [=](auto i) {
static_assert(std::is_same<decltype(i), sycl::item<1>>::value,
"lambda arg type is unexpected");
A[i]++;
});
- q.parallel_for<class Foo>({N}, [=](auto i) {
+ Q.parallel_for<class InitRange>({N}, [=](auto i) {
static_assert(std::is_same<decltype(i), sycl::item<1>>::value,
"lambda arg type is unexpected");
A[i]++;
});
+ Q.parallel_for<class InitRange2D>({4, 2}, [=](auto i) {
+ static_assert(std::is_same<decltype(i), sycl::item<2>>::value,
+ "lambda arg type is unexpected");
+ A[i.get_linear_id()]++;
+ });
+
+ Q.parallel_for<class InitRange3D>({2, 2, 2}, [=](auto i) {
+ static_assert(std::is_same<decltype(i), sycl::item<3>>::value,
+ "lambda arg type is unexpected");
+ A[i.get_linear_id()]++;
+ });
+
// TODO: add kernel with offset and kernel with nd_range once they
// are implemented.
- q.wait();
+ Q.wait();
+ bool Fail{};
for (int i = 0; i < N; i++) {
- assert(A[i] == 3);
+ Fail |= !(A[i] == 5);
}
- sycl::free(A, ctx);
+ sycl::free(A, Ctx);
+ return Fail;
}
More information about the llvm-branch-commits
mailing list