[llvm-branch-commits] [llvm] [openmp] [OpenMP][Offload] Add offload runtime support for dyn_groupprivate clause (PR #152831)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Aug 8 22:48:57 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-offload
Author: Kevin Sala Penades (kevinsala)
<details>
<summary>Changes</summary>
Part 2 adding offload runtime support. See https://github.com/llvm/llvm-project/pull/152651.
---
Patch is 39.36 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/152831.diff
23 Files Affected:
- (modified) offload/DeviceRTL/include/DeviceTypes.h (+4)
- (modified) offload/DeviceRTL/include/Interface.h (+1-1)
- (modified) offload/DeviceRTL/include/State.h (+1-1)
- (modified) offload/DeviceRTL/src/Kernel.cpp (+7-7)
- (modified) offload/DeviceRTL/src/State.cpp (+46-2)
- (modified) offload/include/Shared/APITypes.h (+4-2)
- (modified) offload/include/Shared/Environment.h (+3-1)
- (modified) offload/include/device.h (+3)
- (modified) offload/include/omptarget.h (+6-1)
- (modified) offload/libomptarget/OpenMP/API.cpp (+14)
- (modified) offload/libomptarget/device.cpp (+6)
- (modified) offload/libomptarget/exports (+1)
- (modified) offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa_ext_amd.h (+1)
- (modified) offload/plugins-nextgen/amdgpu/src/rtl.cpp (+22-12)
- (modified) offload/plugins-nextgen/common/include/PluginInterface.h (+29-4)
- (modified) offload/plugins-nextgen/common/src/PluginInterface.cpp (+65-21)
- (modified) offload/plugins-nextgen/cuda/dynamic_cuda/cuda.h (+1)
- (modified) offload/plugins-nextgen/cuda/src/rtl.cpp (+26-11)
- (modified) offload/plugins-nextgen/host/src/rtl.cpp (+2-2)
- (added) offload/test/offloading/dyn_groupprivate_strict.cpp (+141)
- (modified) openmp/runtime/src/include/omp.h.var (+10)
- (modified) openmp/runtime/src/kmp_csupport.cpp (+9)
- (modified) openmp/runtime/src/kmp_stub.cpp (+16)
``````````diff
diff --git a/offload/DeviceRTL/include/DeviceTypes.h b/offload/DeviceRTL/include/DeviceTypes.h
index 2e5d92380f040..a43b506d6879e 100644
--- a/offload/DeviceRTL/include/DeviceTypes.h
+++ b/offload/DeviceRTL/include/DeviceTypes.h
@@ -163,4 +163,8 @@ typedef enum omp_allocator_handle_t {
///}
+enum omp_access_t {
+ omp_access_cgroup = 0,
+};
+
#endif
diff --git a/offload/DeviceRTL/include/Interface.h b/offload/DeviceRTL/include/Interface.h
index c4bfaaa2404b4..672afea206785 100644
--- a/offload/DeviceRTL/include/Interface.h
+++ b/offload/DeviceRTL/include/Interface.h
@@ -222,7 +222,7 @@ struct KernelEnvironmentTy;
int8_t __kmpc_is_spmd_exec_mode();
int32_t __kmpc_target_init(KernelEnvironmentTy &KernelEnvironment,
- KernelLaunchEnvironmentTy &KernelLaunchEnvironment);
+ KernelLaunchEnvironmentTy *KernelLaunchEnvironment);
void __kmpc_target_deinit();
diff --git a/offload/DeviceRTL/include/State.h b/offload/DeviceRTL/include/State.h
index db396dae6e445..17c3c6f2d3e42 100644
--- a/offload/DeviceRTL/include/State.h
+++ b/offload/DeviceRTL/include/State.h
@@ -116,7 +116,7 @@ extern Local<ThreadStateTy **> ThreadStates;
/// Initialize the state machinery. Must be called by all threads.
void init(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment,
- KernelLaunchEnvironmentTy &KernelLaunchEnvironment);
+ KernelLaunchEnvironmentTy *KernelLaunchEnvironment);
/// Return the kernel and kernel launch environment associated with the current
/// kernel. The former is static and contains compile time information that
diff --git a/offload/DeviceRTL/src/Kernel.cpp b/offload/DeviceRTL/src/Kernel.cpp
index 467e44a65276c..58e9a09105a76 100644
--- a/offload/DeviceRTL/src/Kernel.cpp
+++ b/offload/DeviceRTL/src/Kernel.cpp
@@ -34,8 +34,8 @@ enum OMPTgtExecModeFlags : unsigned char {
};
static void
-inititializeRuntime(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment,
- KernelLaunchEnvironmentTy &KernelLaunchEnvironment) {
+initializeRuntime(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment,
+ KernelLaunchEnvironmentTy *KernelLaunchEnvironment) {
// Order is important here.
synchronize::init(IsSPMD);
mapping::init(IsSPMD);
@@ -80,17 +80,17 @@ extern "C" {
/// \param Ident Source location identification, can be NULL.
///
int32_t __kmpc_target_init(KernelEnvironmentTy &KernelEnvironment,
- KernelLaunchEnvironmentTy &KernelLaunchEnvironment) {
+ KernelLaunchEnvironmentTy *KernelLaunchEnvironment) {
ConfigurationEnvironmentTy &Configuration = KernelEnvironment.Configuration;
bool IsSPMD = Configuration.ExecMode & OMP_TGT_EXEC_MODE_SPMD;
bool UseGenericStateMachine = Configuration.UseGenericStateMachine;
if (IsSPMD) {
- inititializeRuntime(/*IsSPMD=*/true, KernelEnvironment,
- KernelLaunchEnvironment);
+ initializeRuntime(/*IsSPMD=*/true, KernelEnvironment,
+ KernelLaunchEnvironment);
synchronize::threadsAligned(atomic::relaxed);
} else {
- inititializeRuntime(/*IsSPMD=*/false, KernelEnvironment,
- KernelLaunchEnvironment);
+ initializeRuntime(/*IsSPMD=*/false, KernelEnvironment,
+ KernelLaunchEnvironment);
// No need to wait since only the main threads will execute user
// code and workers will run into a barrier right away.
}
diff --git a/offload/DeviceRTL/src/State.cpp b/offload/DeviceRTL/src/State.cpp
index 62b03e7bba720..9e2a9999167b4 100644
--- a/offload/DeviceRTL/src/State.cpp
+++ b/offload/DeviceRTL/src/State.cpp
@@ -158,6 +158,34 @@ void SharedMemorySmartStackTy::pop(void *Ptr, uint64_t Bytes) {
memory::freeGlobal(Ptr, "Slow path shared memory deallocation");
}
+struct DynCGroupMemTy {
+ void init(KernelLaunchEnvironmentTy *KLE, void *NativeDynCGroup) {
+ Size = 0;
+ Ptr = nullptr;
+ IsFallback = false;
+ if (KLE) {
+ Size = KLE->DynCGroupMemSize;
+ if (void *Fallback = KLE->DynCGroupMemFallback) {
+ Ptr = static_cast<char *>(Fallback) + Size * omp_get_team_num();
+ IsFallback = true;
+ } else {
+ Ptr = static_cast<char *>(NativeDynCGroup);
+ }
+ }
+ }
+
+ char *getPtr(size_t Offset) const { return Ptr + Offset; }
+ bool isFallback() const { return IsFallback; }
+ size_t getSize() const { return Size; }
+
+private:
+ char *Ptr;
+ size_t Size;
+ bool IsFallback;
+};
+
+[[clang::loader_uninitialized]] static Local<DynCGroupMemTy> DynCGroupMem;
+
} // namespace
void *memory::getDynamicBuffer() { return DynamicSharedBuffer; }
@@ -246,13 +274,18 @@ int returnValIfLevelIsActive(int Level, int Val, int DefaultVal,
} // namespace
void state::init(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment,
- KernelLaunchEnvironmentTy &KernelLaunchEnvironment) {
+ KernelLaunchEnvironmentTy *KLE) {
SharedMemorySmartStack.init(IsSPMD);
+
+ if (KLE == reinterpret_cast<KernelLaunchEnvironmentTy *>(~0))
+ KLE = nullptr;
+
if (mapping::isInitialThreadInLevel0(IsSPMD)) {
+ DynCGroupMem.init(KLE, DynamicSharedBuffer);
TeamState.init(IsSPMD);
ThreadStates = nullptr;
KernelEnvironmentPtr = &KernelEnvironment;
- KernelLaunchEnvironmentPtr = &KernelLaunchEnvironment;
+ KernelLaunchEnvironmentPtr = KLE;
}
}
@@ -430,6 +463,17 @@ int omp_get_team_num() { return mapping::getBlockIdInKernel(); }
int omp_get_initial_device(void) { return -1; }
int omp_is_initial_device(void) { return 0; }
+
+void *omp_get_dyn_groupprivate_ptr(size_t Offset, int *IsFallback,
+ omp_access_t) {
+ if (IsFallback != NULL)
+ *IsFallback = DynCGroupMem.isFallback();
+ return DynCGroupMem.getPtr(Offset);
+}
+
+size_t omp_get_dyn_groupprivate_size(omp_access_t) {
+ return DynCGroupMem.getSize();
+}
}
extern "C" {
diff --git a/offload/include/Shared/APITypes.h b/offload/include/Shared/APITypes.h
index 978b53d5d69b9..0ef2dd162292b 100644
--- a/offload/include/Shared/APITypes.h
+++ b/offload/include/Shared/APITypes.h
@@ -97,8 +97,10 @@ struct KernelArgsTy {
struct {
uint64_t NoWait : 1; // Was this kernel spawned with a `nowait` clause.
uint64_t IsCUDA : 1; // Was this kernel spawned via CUDA.
- uint64_t Unused : 62;
- } Flags = {0, 0, 0};
+ uint64_t AllowDynCGroupMemFallback : 1; // Allow fallback for dynamic cgroup
+ // mem fallback.
+ uint64_t Unused : 61;
+ } Flags = {0, 0, 0, 0};
// The number of teams (for x,y,z dimension).
uint32_t NumTeams[3] = {0, 0, 0};
// The number of threads (for x,y,z dimension).
diff --git a/offload/include/Shared/Environment.h b/offload/include/Shared/Environment.h
index 2a283bd6fa4ed..0670ac1090da4 100644
--- a/offload/include/Shared/Environment.h
+++ b/offload/include/Shared/Environment.h
@@ -93,9 +93,11 @@ struct KernelEnvironmentTy {
};
struct KernelLaunchEnvironmentTy {
+ void *ReductionBuffer = nullptr;
+ void *DynCGroupMemFallback = nullptr;
uint32_t ReductionCnt = 0;
uint32_t ReductionIterCnt = 0;
- void *ReductionBuffer = nullptr;
+ uint32_t DynCGroupMemSize = 0;
};
#endif // OMPTARGET_SHARED_ENVIRONMENT_H
diff --git a/offload/include/device.h b/offload/include/device.h
index f4b10abbaa3fd..0e93cf8ec1a8b 100644
--- a/offload/include/device.h
+++ b/offload/include/device.h
@@ -158,6 +158,9 @@ struct DeviceTy {
/// Indicate that there are pending images for this device or not.
void setHasPendingImages(bool V) { HasPendingImages = V; }
+ /// Get the maximum shared memory per team for any kernel.
+ uint64_t getMaxSharedTeamMemory();
+
private:
/// Deinitialize the device (and plugin).
void deinit();
diff --git a/offload/include/omptarget.h b/offload/include/omptarget.h
index 6971780c7bdb5..45bb74ec367d6 100644
--- a/offload/include/omptarget.h
+++ b/offload/include/omptarget.h
@@ -107,7 +107,7 @@ enum TargetAllocTy : int32_t {
inline KernelArgsTy CTorDTorKernelArgs = {1, 0, nullptr, nullptr,
nullptr, nullptr, nullptr, nullptr,
- 0, {0,0,0}, {1, 0, 0}, {1, 0, 0}, 0};
+ 0, {0,0,0,0}, {1, 0, 0}, {1, 0, 0}, 0};
struct DeviceTy;
@@ -273,10 +273,15 @@ struct __tgt_target_non_contig {
extern "C" {
#endif
+typedef enum {
+ omp_access_cgroup = 0,
+} omp_access_t;
+
void ompx_dump_mapping_tables(void);
int omp_get_num_devices(void);
int omp_get_device_num(void);
int omp_get_initial_device(void);
+size_t omp_get_groupprivate_limit(int device_num, omp_access_t access_group = omp_access_cgroup);
void *omp_target_alloc(size_t Size, int DeviceNum);
void omp_target_free(void *DevicePtr, int DeviceNum);
int omp_target_is_present(const void *Ptr, int DeviceNum);
diff --git a/offload/libomptarget/OpenMP/API.cpp b/offload/libomptarget/OpenMP/API.cpp
index 4576f9bd06121..1ed4192157fc8 100644
--- a/offload/libomptarget/OpenMP/API.cpp
+++ b/offload/libomptarget/OpenMP/API.cpp
@@ -98,6 +98,20 @@ EXTERN int omp_get_initial_device(void) {
return HostDevice;
}
+EXTERN size_t omp_get_groupprivate_limit(int DeviceNum,
+ omp_access_t AccessGroup) {
+ TIMESCOPE();
+ OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
+ if (DeviceNum == omp_get_initial_device())
+ return 0;
+
+ auto DeviceOrErr = PM->getDevice(DeviceNum);
+ if (!DeviceOrErr)
+ FATAL_MESSAGE(DeviceNum, "%s", toString(DeviceOrErr.takeError()).c_str());
+
+ return DeviceOrErr->getMaxSharedTeamMemory();
+}
+
EXTERN void *omp_target_alloc(size_t Size, int DeviceNum) {
TIMESCOPE_WITH_DETAILS("dst_dev=" + std::to_string(DeviceNum) +
";size=" + std::to_string(Size));
diff --git a/offload/libomptarget/device.cpp b/offload/libomptarget/device.cpp
index f88e30ae9e76b..31bfc7d092424 100644
--- a/offload/libomptarget/device.cpp
+++ b/offload/libomptarget/device.cpp
@@ -281,3 +281,9 @@ bool DeviceTy::useAutoZeroCopy() {
return false;
return RTL->use_auto_zero_copy(RTLDeviceID);
}
+
+uint64_t DeviceTy::getMaxSharedTeamMemory() {
+ using DeviceQueryKind = llvm::omp::target::plugin::DeviceQueryKind;
+ return RTL->query_device_info(
+ RTLDeviceID, DeviceQueryKind::DEVICE_QUERY_MAX_SHARED_TEAM_MEM);
+}
diff --git a/offload/libomptarget/exports b/offload/libomptarget/exports
index 2406776c1fb5f..b5a1401564d58 100644
--- a/offload/libomptarget/exports
+++ b/offload/libomptarget/exports
@@ -40,6 +40,7 @@ VERS1.0 {
omp_get_num_devices;
omp_get_device_num;
omp_get_initial_device;
+ omp_get_groupprivate_limit;
omp_target_alloc;
omp_target_free;
omp_target_is_present;
diff --git a/offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa_ext_amd.h b/offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa_ext_amd.h
index 3117763e35896..2cf156e576c5f 100644
--- a/offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa_ext_amd.h
+++ b/offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa_ext_amd.h
@@ -52,6 +52,7 @@ typedef enum {
HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE = 6,
HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT = 7,
HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL = 15,
+ HSA_AMD_MEMORY_POOL_INFO_ALLOC_MAX_SIZE = 16,
} hsa_amd_memory_pool_info_t;
typedef enum {
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index 12c7cc62905c9..fa373c2029f0c 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -273,7 +273,6 @@ struct AMDGPUMemoryPoolTy {
if (auto Err = getAttr(HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, GlobalFlags))
return Err;
-
return Plugin::success();
}
@@ -543,6 +542,8 @@ struct AMDGPUKernelTy : public GenericKernelTy {
return Err;
}
+ StaticBlockMemSize = GroupSize;
+
// Make sure it is a kernel symbol.
if (SymbolType != HSA_SYMBOL_KIND_KERNEL)
return Plugin::error(ErrorCode::INVALID_BINARY,
@@ -566,8 +567,8 @@ struct AMDGPUKernelTy : public GenericKernelTy {
/// Launch the AMDGPU kernel function.
Error launchImpl(GenericDeviceTy &GenericDevice, uint32_t NumThreads[3],
- uint32_t NumBlocks[3], KernelArgsTy &KernelArgs,
- KernelLaunchParamsTy LaunchParams,
+ uint32_t NumBlocks[3], uint32_t DynBlockMemSize,
+ KernelArgsTy &KernelArgs, KernelLaunchParamsTy LaunchParams,
AsyncInfoWrapperTy &AsyncInfoWrapper) const override;
/// Print more elaborate kernel launch info for AMDGPU
@@ -2020,6 +2021,20 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
if (auto Err = checkIfAPU())
return Err;
+ // Retrieve the size of the group memory.
+ for (const auto *Pool : AllMemoryPools) {
+ if (Pool->isGroup()) {
+ size_t Size = 0;
+ if (auto Err = Pool->getAttr(HSA_AMD_MEMORY_POOL_INFO_SIZE, Size))
+ return Err;
+ MaxBlockSharedMemSize = Size;
+ break;
+ }
+ }
+
+ // Supports block shared memory natively.
+ HasNativeBlockSharedMem = true;
+
return Plugin::success();
}
@@ -2856,7 +2871,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
KernelArgsTy KernelArgs = {};
uint32_t NumBlocksAndThreads[3] = {1u, 1u, 1u};
if (auto Err = AMDGPUKernel.launchImpl(
- *this, NumBlocksAndThreads, NumBlocksAndThreads, KernelArgs,
+ *this, NumBlocksAndThreads, NumBlocksAndThreads, 0, KernelArgs,
KernelLaunchParamsTy{}, AsyncInfoWrapper))
return Err;
@@ -3357,6 +3372,7 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
uint32_t NumThreads[3], uint32_t NumBlocks[3],
+ uint32_t DynBlockMemSize,
KernelArgsTy &KernelArgs,
KernelLaunchParamsTy LaunchParams,
AsyncInfoWrapperTy &AsyncInfoWrapper) const {
@@ -3374,13 +3390,6 @@ Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
if (auto Err = ArgsMemoryManager.allocate(ArgsSize, &AllArgs))
return Err;
- // Account for user requested dynamic shared memory.
- uint32_t GroupSize = getGroupSize();
- if (uint32_t MaxDynCGroupMem = std::max(
- KernelArgs.DynCGroupMem, GenericDevice.getDynamicMemorySize())) {
- GroupSize += MaxDynCGroupMem;
- }
-
uint64_t StackSize;
if (auto Err = GenericDevice.getDeviceStackSize(StackSize))
return Err;
@@ -3434,7 +3443,8 @@ Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
// Push the kernel launch into the stream.
return Stream->pushKernelLaunch(*this, AllArgs, NumThreads, NumBlocks,
- GroupSize, StackSize, ArgsMemoryManager);
+ getStaticBlockMemSize() + DynBlockMemSize,
+ StackSize, ArgsMemoryManager);
}
Error AMDGPUKernelTy::printLaunchInfoDetails(GenericDeviceTy &GenericDevice,
diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h
index 162b149ab483e..3357ccfe0c9b5 100644
--- a/offload/plugins-nextgen/common/include/PluginInterface.h
+++ b/offload/plugins-nextgen/common/include/PluginInterface.h
@@ -226,6 +226,10 @@ struct InfoTreeNode {
}
};
+enum class DeviceQueryKind {
+ DEVICE_QUERY_MAX_SHARED_TEAM_MEM = 0,
+};
+
/// Class wrapping a __tgt_device_image and its offload entry table on a
/// specific device. This class is responsible for storing and managing
/// the offload entries for an image on a device.
@@ -312,13 +316,16 @@ struct GenericKernelTy {
AsyncInfoWrapperTy &AsyncInfoWrapper) const;
virtual Error launchImpl(GenericDeviceTy &GenericDevice,
uint32_t NumThreads[3], uint32_t NumBlocks[3],
- KernelArgsTy &KernelArgs,
+ uint32_t DynBlockMemSize, KernelArgsTy &KernelArgs,
KernelLaunchParamsTy LaunchParams,
AsyncInfoWrapperTy &AsyncInfoWrapper) const = 0;
/// Get the kernel name.
const char *getName() const { return Name.c_str(); }
+ /// Get the size of the static per-block memory consumed by the kernel.
+ uint32_t getStaticBlockMemSize() const { return StaticBlockMemSize; };
+
/// Get the kernel image.
DeviceImageTy &getImage() const {
assert(ImagePtr && "Kernel is not initialized!");
@@ -331,9 +338,9 @@ struct GenericKernelTy {
}
/// Return a device pointer to a new kernel launch environment.
- Expected<KernelLaunchEnvironmentTy *>
- getKernelLaunchEnvironment(GenericDeviceTy &GenericDevice, uint32_t Version,
- AsyncInfoWrapperTy &AsyncInfo) const;
+ Expected<KernelLaunchEnvironmentTy *> getKernelLaunchEnvironment(
+ GenericDeviceTy &GenericDevice, const KernelArgsTy &KernelArgs,
+ void *FallbackBlockMem, AsyncInfoWrapperTy &AsyncInfo) const;
/// Indicate whether an execution mode is valid.
static bool isValidExecutionMode(OMPTgtExecModeFlags ExecutionMode) {
@@ -425,6 +432,9 @@ struct GenericKernelTy {
/// The maximum number of threads which the kernel could leverage.
uint32_t MaxNumThreads;
+ /// The static memory sized per block.
+ uint32_t StaticBlockMemSize = 0;
+
/// The kernel environment, including execution flags.
KernelEnvironmentTy KernelEnvironment;
@@ -731,6 +741,12 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
/// this id is not unique between different plugins; they may overlap.
int32_t getDeviceId() const { return DeviceId; }
+ /// Get the total shared memory per block that can be used in any kernel.
+ uint32_t getMaxBlockSharedMemSize() const { return MaxBlockSharedMemSize; }
+
+ /// Indicate whether the device has native block shared memory.
+ bool hasNativeBlockSharedMem() const { return HasNativeBlockSharedMem; }
+
/// Set the context of the device if needed, before calling device-specific
/// functions. Plugins may implement this function as a no-op if not needed.
virtual Error setContext() = 0;
@@ -1132,6 +1148,12 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
std::atomic<bool> OmptInitialized;
#endif
+ /// The total per-block shared memory that a kernel may use.
+ uint32_t MaxBlockSharedMemSize = 0;
+
+ /// Whether the device has native block shared memory.
+ bool HasNativeBlockSharedMem = false;
+
private:
DeviceMemoryPoolTy DeviceMemoryPool = {nullptr, 0};
DeviceMemoryPoolTrackingTy DeviceMemoryPoolTracking = {0, 0, ~0U, 0};
@@ -1347,6 +1369,9 @@ struct GenericPluginTy {
/// Prints information about the given devices supported by the plugin.
void print_device_info(int32_t DeviceId);
+ /// Retrieve information about the given device.
+ int64_t query_device_info(int32_t DeviceId, DeviceQueryKind Query);
+
/// Creates an event in the given plugin if supported.
int32_t create_event(int32_t DeviceId, void **EventPtr);
diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp
index 81b9d423e13d8..2997585e1660f 100644
--- a/offload/plugins-nextgen/common/src/PluginInterface.cpp
+++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp
@@ -477,20 +477,20 @@ Error GenericKernelTy::init(GenericDeviceTy &GenericDevice,
Expected<KernelLaunchEnvironmentTy *>
GenericKernelTy::getKernelLaunchEnvironment(
- GenericDeviceTy &GenericDevice, uint32_t Version,
- AsyncInfoWrapperTy &AsyncInfoWrapper) const {
+ GenericDeviceTy &GenericDevice, const KernelArgsTy &KernelArgs,
+ void *FallbackBlockMem, AsyncInfoWrapperTy &AsyncInfoWrapper) const {
// Ctor/Dtor have no arguments, replaying uses the original kernel launch
// environment. Older versions of the compiler do not generate a kernel
// launch environment.
if (GenericDevice.Plugin.getRecordReplay().isReplaying() ||
- Version < OMP_KERNEL_ARG_MIN_VERSION_WITH_DYN_PTR)
+ KernelArgs.Version < OMP_KERNEL_ARG_MIN_VERSION_WITH_DYN_PTR)
return nullptr;
- if (!KernelEnvironment....
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/152831
More information about the llvm-branch-commits
mailing list