[llvm] [OpenMP] Add explicit attributes to every function declaration (PR #122399)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 9 17:41:07 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-offload
Author: Joseph Huber (jhuber6)
<details>
<summary>Changes</summary>
Summary:
Instead of having the scoped attributes, add this to every function.
---
Patch is 149.69 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/122399.diff
28 Files Affected:
- (modified) offload/DeviceRTL/CMakeLists.txt (+1-1)
- (modified) offload/DeviceRTL/include/Allocator.h (+6-5)
- (modified) offload/DeviceRTL/include/Configuration.h (+13-13)
- (modified) offload/DeviceRTL/include/Debug.h (+6-5)
- (modified) offload/DeviceRTL/include/DeviceTypes.h (+3-3)
- (modified) offload/DeviceRTL/include/DeviceUtils.h (+12-9)
- (modified) offload/DeviceRTL/include/Interface.h (+117-113)
- (modified) offload/DeviceRTL/include/LibC.h (+3-3)
- (modified) offload/DeviceRTL/include/Mapping.h (+22-22)
- (modified) offload/DeviceRTL/include/Profiling.h (+5-3)
- (modified) offload/DeviceRTL/include/State.h (+50-45)
- (modified) offload/DeviceRTL/include/Synchronization.h (+27-26)
- (modified) offload/DeviceRTL/include/Workshare.h (+3-1)
- (modified) offload/DeviceRTL/src/Allocator.cpp (+8-5)
- (modified) offload/DeviceRTL/src/Configuration.cpp (+15-13)
- (modified) offload/DeviceRTL/src/Debug.cpp (+7-5)
- (modified) offload/DeviceRTL/src/DeviceUtils.cpp (+39-28)
- (modified) offload/DeviceRTL/src/Kernel.cpp (+7-6)
- (modified) offload/DeviceRTL/src/LibC.cpp (+9-8)
- (modified) offload/DeviceRTL/src/Mapping.cpp (+72-69)
- (modified) offload/DeviceRTL/src/Misc.cpp (+20-16)
- (modified) offload/DeviceRTL/src/Parallelism.cpp (+22-16)
- (modified) offload/DeviceRTL/src/Profiling.cpp (+3-3)
- (modified) offload/DeviceRTL/src/Reduction.cpp (+27-27)
- (modified) offload/DeviceRTL/src/State.cpp (+94-77)
- (modified) offload/DeviceRTL/src/Synchronization.cpp (+114-90)
- (modified) offload/DeviceRTL/src/Tasking.cpp (+27-25)
- (modified) offload/DeviceRTL/src/Workshare.cpp (+117-99)
``````````diff
diff --git a/offload/DeviceRTL/CMakeLists.txt b/offload/DeviceRTL/CMakeLists.txt
index 099634e211e7a7..e6859ab3d9e9e3 100644
--- a/offload/DeviceRTL/CMakeLists.txt
+++ b/offload/DeviceRTL/CMakeLists.txt
@@ -98,7 +98,7 @@ list(TRANSFORM LIBOMPTARGET_LLVM_INCLUDE_DIRS_DEVICERTL PREPEND "-I")
set(bc_flags -c -foffload-lto -std=c++17 -fvisibility=hidden
${clang_opt_flags} --offload-device-only
-nocudalib -nogpulib -nogpuinc -nostdlibinc
- -fopenmp -fopenmp-cuda-mode
+ -fopenmp -fopenmp-cuda-mode -Wno-unknown-assumption
-Wno-unknown-cuda-version -Wno-openmp-target
-DOMPTARGET_DEVICE_RUNTIME
-I${include_directory}
diff --git a/offload/DeviceRTL/include/Allocator.h b/offload/DeviceRTL/include/Allocator.h
index 475f6a21bb47eb..d3ff7185bb29bb 100644
--- a/offload/DeviceRTL/include/Allocator.h
+++ b/offload/DeviceRTL/include/Allocator.h
@@ -26,22 +26,23 @@ namespace allocator {
static uint64_t constexpr ALIGNMENT = 16;
/// Initialize the allocator according to \p KernelEnvironment
-void init(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment);
+OMP_ATTRS void init(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment);
/// Allocate \p Size bytes.
-[[gnu::alloc_size(1), gnu::assume_aligned(ALIGNMENT), gnu::malloc]] void *
+[[gnu::alloc_size(1), gnu::assume_aligned(ALIGNMENT),
+ gnu::malloc]] OMP_ATTRS void *
alloc(uint64_t Size);
/// Free the allocation pointed to by \p Ptr.
-void free(void *Ptr);
+OMP_ATTRS void free(void *Ptr);
} // namespace allocator
} // namespace ompx
extern "C" {
-[[gnu::weak]] void *malloc(size_t Size);
-[[gnu::weak]] void free(void *Ptr);
+[[gnu::weak]] OMP_ATTRS void *malloc(size_t Size);
+[[gnu::weak]] OMP_ATTRS void free(void *Ptr);
}
#pragma omp end declare target
diff --git a/offload/DeviceRTL/include/Configuration.h b/offload/DeviceRTL/include/Configuration.h
index f8b7a6c3c6c9da..cf638838a7d382 100644
--- a/offload/DeviceRTL/include/Configuration.h
+++ b/offload/DeviceRTL/include/Configuration.h
@@ -22,45 +22,45 @@ namespace config {
/// Return the number of devices in the system, same number as returned on the
/// host by omp_get_num_devices.
-uint32_t getNumDevices();
+OMP_ATTRS uint32_t getNumDevices();
/// Return the device number in the system for omp_get_device_num.
-uint32_t getDeviceNum();
+OMP_ATTRS uint32_t getDeviceNum();
/// Return the user choosen debug level.
-uint32_t getDebugKind();
+OMP_ATTRS uint32_t getDebugKind();
/// Return if teams oversubscription is assumed
-uint32_t getAssumeTeamsOversubscription();
+OMP_ATTRS uint32_t getAssumeTeamsOversubscription();
/// Return if threads oversubscription is assumed
-uint32_t getAssumeThreadsOversubscription();
+OMP_ATTRS uint32_t getAssumeThreadsOversubscription();
/// Return the amount of dynamic shared memory that was allocated at launch.
-uint64_t getDynamicMemorySize();
+OMP_ATTRS uint64_t getDynamicMemorySize();
/// Returns the cycles per second of the device's fixed frequency clock.
-uint64_t getClockFrequency();
+OMP_ATTRS uint64_t getClockFrequency();
/// Returns the pointer to the beginning of the indirect call table.
-void *getIndirectCallTablePtr();
+OMP_ATTRS void *getIndirectCallTablePtr();
/// Returns the size of the indirect call table.
-uint64_t getIndirectCallTableSize();
+OMP_ATTRS uint64_t getIndirectCallTableSize();
/// Returns the size of the indirect call table.
-uint64_t getHardwareParallelism();
+OMP_ATTRS uint64_t getHardwareParallelism();
/// Return if debugging is enabled for the given debug kind.
-bool isDebugMode(DeviceDebugKind Level);
+OMP_ATTRS bool isDebugMode(DeviceDebugKind Level);
/// Indicates if this kernel may require thread-specific states, or if it was
/// explicitly disabled by the user.
-bool mayUseThreadStates();
+OMP_ATTRS bool mayUseThreadStates();
/// Indicates if this kernel may require data environments for nested
/// parallelism, or if it was explicitly disabled by the user.
-bool mayUseNestedParallelism();
+OMP_ATTRS bool mayUseNestedParallelism();
} // namespace config
} // namespace ompx
diff --git a/offload/DeviceRTL/include/Debug.h b/offload/DeviceRTL/include/Debug.h
index 22998f44a5bea5..31b465fe425b39 100644
--- a/offload/DeviceRTL/include/Debug.h
+++ b/offload/DeviceRTL/include/Debug.h
@@ -19,11 +19,12 @@
///
/// {
extern "C" {
-void __assert_assume(bool condition);
-void __assert_fail(const char *expr, const char *file, unsigned line,
- const char *function);
-void __assert_fail_internal(const char *expr, const char *msg, const char *file,
- unsigned line, const char *function);
+OMP_ATTRS void __assert_assume(bool condition);
+OMP_ATTRS void __assert_fail(const char *expr, const char *file, unsigned line,
+ const char *function);
+OMP_ATTRS void __assert_fail_internal(const char *expr, const char *msg,
+ const char *file, unsigned line,
+ const char *function);
}
#define ASSERT(expr, msg) \
diff --git a/offload/DeviceRTL/include/DeviceTypes.h b/offload/DeviceRTL/include/DeviceTypes.h
index 259bc008f91d13..404c2d7ca8d5ef 100644
--- a/offload/DeviceRTL/include/DeviceTypes.h
+++ b/offload/DeviceRTL/include/DeviceTypes.h
@@ -20,9 +20,9 @@
// another function but only inline assembly that performs some operation or
// side-effect and then continues execution with something on the existing call
// stack.
-//
-// TODO: Find a good place for this
-#pragma omp assumes ext_no_call_asm
+#pragma omp begin declare variant match(device = {kind(gpu)})
+#define OMP_ATTRS [[omp::assume("ext_no_call_asm")]]
+#pragma omp end declare variant
enum omp_proc_bind_t {
omp_proc_bind_false = 0,
diff --git a/offload/DeviceRTL/include/DeviceUtils.h b/offload/DeviceRTL/include/DeviceUtils.h
index fa66b973a4f5e7..fddd0c8722f3f2 100644
--- a/offload/DeviceRTL/include/DeviceUtils.h
+++ b/offload/DeviceRTL/include/DeviceUtils.h
@@ -60,32 +60,35 @@ struct remove_addrspace<T [[clang::address_space(N)]]> : type_identity<T> {};
template <class T>
using remove_addrspace_t = typename remove_addrspace<T>::type;
-template <typename To, typename From> inline To bitCast(From V) {
+template <typename To, typename From> OMP_ATTRS inline To bitCast(From V) {
static_assert(sizeof(To) == sizeof(From), "Bad conversion");
return __builtin_bit_cast(To, V);
}
/// Return the value \p Var from thread Id \p SrcLane in the warp if the thread
/// is identified by \p Mask.
-int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane, int32_t Width);
+OMP_ATTRS int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane,
+ int32_t Width);
-int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t Delta, int32_t Width);
+OMP_ATTRS int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t Delta,
+ int32_t Width);
-int64_t shuffleDown(uint64_t Mask, int64_t Var, uint32_t Delta, int32_t Width);
+OMP_ATTRS int64_t shuffleDown(uint64_t Mask, int64_t Var, uint32_t Delta,
+ int32_t Width);
-uint64_t ballotSync(uint64_t Mask, int32_t Pred);
+OMP_ATTRS uint64_t ballotSync(uint64_t Mask, int32_t Pred);
/// Return \p LowBits and \p HighBits packed into a single 64 bit value.
-uint64_t pack(uint32_t LowBits, uint32_t HighBits);
+OMP_ATTRS uint64_t pack(uint32_t LowBits, uint32_t HighBits);
/// Unpack \p Val into \p LowBits and \p HighBits.
-void unpack(uint64_t Val, uint32_t &LowBits, uint32_t &HighBits);
+OMP_ATTRS void unpack(uint64_t Val, uint32_t &LowBits, uint32_t &HighBits);
/// Return true iff \p Ptr is pointing into shared (local) memory (AS(3)).
-bool isSharedMemPtr(void *Ptr);
+OMP_ATTRS bool isSharedMemPtr(void *Ptr);
/// Return true iff \p Ptr is pointing into (thread) local memory (AS(5)).
-bool isThreadLocalMemPtr(void *Ptr);
+OMP_ATTRS bool isThreadLocalMemPtr(void *Ptr);
/// A pointer variable that has by design an `undef` value. Use with care.
[[clang::loader_uninitialized]] static void *const UndefPtr;
diff --git a/offload/DeviceRTL/include/Interface.h b/offload/DeviceRTL/include/Interface.h
index c4bfaaa2404b4f..cb0bfed8edc9df 100644
--- a/offload/DeviceRTL/include/Interface.h
+++ b/offload/DeviceRTL/include/Interface.h
@@ -28,8 +28,8 @@ extern "C" {
/// getter: returns 0.
///
///{
-void omp_set_dynamic(int);
-int omp_get_dynamic(void);
+OMP_ATTRS void omp_set_dynamic(int);
+OMP_ATTRS int omp_get_dynamic(void);
///}
/// ICV: nthreads-var, integer
@@ -43,8 +43,8 @@ int omp_get_dynamic(void);
///
///
///{
-void omp_set_num_threads(int);
-int omp_get_max_threads(void);
+OMP_ATTRS void omp_set_num_threads(int);
+OMP_ATTRS int omp_get_max_threads(void);
///}
/// ICV: thread-limit-var, computed
@@ -52,7 +52,7 @@ int omp_get_max_threads(void);
/// getter: returns thread limited defined during launch.
///
///{
-int omp_get_thread_limit(void);
+OMP_ATTRS int omp_get_thread_limit(void);
///}
/// ICV: max-active-level-var, constant 1
@@ -61,8 +61,8 @@ int omp_get_thread_limit(void);
/// getter: returns 1.
///
///{
-void omp_set_max_active_levels(int);
-int omp_get_max_active_levels(void);
+OMP_ATTRS void omp_set_max_active_levels(int);
+OMP_ATTRS int omp_get_max_active_levels(void);
///}
/// ICV: places-partition-var
@@ -76,7 +76,7 @@ int omp_get_max_active_levels(void);
/// getter: returns 0 or 1.
///
///{
-int omp_get_active_level(void);
+OMP_ATTRS int omp_get_active_level(void);
///}
/// ICV: level-var
@@ -84,88 +84,88 @@ int omp_get_active_level(void);
/// getter: returns parallel region nesting
///
///{
-int omp_get_level(void);
+OMP_ATTRS int omp_get_level(void);
///}
/// ICV: run-sched-var
///
///
///{
-void omp_set_schedule(omp_sched_t, int);
-void omp_get_schedule(omp_sched_t *, int *);
+OMP_ATTRS void omp_set_schedule(omp_sched_t, int);
+OMP_ATTRS void omp_get_schedule(omp_sched_t *, int *);
///}
/// TODO this is incomplete.
-int omp_get_num_threads(void);
-int omp_get_thread_num(void);
-void omp_set_nested(int);
+OMP_ATTRS int omp_get_num_threads(void);
+OMP_ATTRS int omp_get_thread_num(void);
+OMP_ATTRS void omp_set_nested(int);
-int omp_get_nested(void);
+OMP_ATTRS int omp_get_nested(void);
-void omp_set_max_active_levels(int Level);
+OMP_ATTRS void omp_set_max_active_levels(int Level);
-int omp_get_max_active_levels(void);
+OMP_ATTRS int omp_get_max_active_levels(void);
-omp_proc_bind_t omp_get_proc_bind(void);
+OMP_ATTRS omp_proc_bind_t omp_get_proc_bind(void);
-int omp_get_num_places(void);
+OMP_ATTRS int omp_get_num_places(void);
-int omp_get_place_num_procs(int place_num);
+OMP_ATTRS int omp_get_place_num_procs(int place_num);
-void omp_get_place_proc_ids(int place_num, int *ids);
+OMP_ATTRS void omp_get_place_proc_ids(int place_num, int *ids);
-int omp_get_place_num(void);
+OMP_ATTRS int omp_get_place_num(void);
-int omp_get_partition_num_places(void);
+OMP_ATTRS int omp_get_partition_num_places(void);
-void omp_get_partition_place_nums(int *place_nums);
+OMP_ATTRS void omp_get_partition_place_nums(int *place_nums);
-int omp_get_cancellation(void);
+OMP_ATTRS int omp_get_cancellation(void);
-void omp_set_default_device(int deviceId);
+OMP_ATTRS void omp_set_default_device(int deviceId);
-int omp_get_default_device(void);
+OMP_ATTRS int omp_get_default_device(void);
-int omp_get_num_devices(void);
+OMP_ATTRS int omp_get_num_devices(void);
-int omp_get_device_num(void);
+OMP_ATTRS int omp_get_device_num(void);
-int omp_get_num_teams(void);
+OMP_ATTRS int omp_get_num_teams(void);
-int omp_get_team_num();
+OMP_ATTRS int omp_get_team_num();
-int omp_get_initial_device(void);
+OMP_ATTRS int omp_get_initial_device(void);
-void *llvm_omp_target_dynamic_shared_alloc();
+OMP_ATTRS void *llvm_omp_target_dynamic_shared_alloc();
/// Synchronization
///
///{
-void omp_init_lock(omp_lock_t *Lock);
+OMP_ATTRS void omp_init_lock(omp_lock_t *Lock);
-void omp_destroy_lock(omp_lock_t *Lock);
+OMP_ATTRS void omp_destroy_lock(omp_lock_t *Lock);
-void omp_set_lock(omp_lock_t *Lock);
+OMP_ATTRS void omp_set_lock(omp_lock_t *Lock);
-void omp_unset_lock(omp_lock_t *Lock);
+OMP_ATTRS void omp_unset_lock(omp_lock_t *Lock);
-int omp_test_lock(omp_lock_t *Lock);
+OMP_ATTRS int omp_test_lock(omp_lock_t *Lock);
///}
/// Tasking
///
///{
-int omp_in_final(void);
+OMP_ATTRS int omp_in_final(void);
-int omp_get_max_task_priority(void);
+OMP_ATTRS int omp_get_max_task_priority(void);
///}
/// Misc
///
///{
-double omp_get_wtick(void);
+OMP_ATTRS double omp_get_wtick(void);
-double omp_get_wtime(void);
+OMP_ATTRS double omp_get_wtime(void);
///}
}
@@ -173,16 +173,16 @@ extern "C" {
/// Allocate \p Bytes in "shareable" memory and return the address. Needs to be
/// called balanced with __kmpc_free_shared like a stack (push/pop). Can be
/// called by any thread, allocation happens *per thread*.
-void *__kmpc_alloc_shared(uint64_t Bytes);
+OMP_ATTRS void *__kmpc_alloc_shared(uint64_t Bytes);
/// Deallocate \p Ptr. Needs to be called balanced with __kmpc_alloc_shared like
/// a stack (push/pop). Can be called by any thread. \p Ptr has to be the
/// allocated by __kmpc_alloc_shared by the same thread.
-void __kmpc_free_shared(void *Ptr, uint64_t Bytes);
+OMP_ATTRS void __kmpc_free_shared(void *Ptr, uint64_t Bytes);
/// Get a pointer to the memory buffer containing dynamically allocated shared
/// memory configured at launch.
-void *__kmpc_get_dynamic_shared();
+OMP_ATTRS void *__kmpc_get_dynamic_shared();
/// Allocate sufficient space for \p NumArgs sequential `void*` and store the
/// allocation address in \p GlobalArgs.
@@ -191,27 +191,28 @@ void *__kmpc_get_dynamic_shared();
///
/// We also remember it in GlobalArgsPtr to ensure the worker threads and
/// deallocation function know the allocation address too.
-void __kmpc_begin_sharing_variables(void ***GlobalArgs, uint64_t NumArgs);
+OMP_ATTRS void __kmpc_begin_sharing_variables(void ***GlobalArgs,
+ uint64_t NumArgs);
/// Deallocate the memory allocated by __kmpc_begin_sharing_variables.
///
/// Called by the main thread after a parallel region.
-void __kmpc_end_sharing_variables();
+OMP_ATTRS void __kmpc_end_sharing_variables();
/// Store the allocation address obtained via __kmpc_begin_sharing_variables in
/// \p GlobalArgs.
///
/// Called by the worker threads in the parallel region (function).
-void __kmpc_get_shared_variables(void ***GlobalArgs);
+OMP_ATTRS void __kmpc_get_shared_variables(void ***GlobalArgs);
/// External interface to get the thread ID.
-uint32_t __kmpc_get_hardware_thread_id_in_block();
+OMP_ATTRS uint32_t __kmpc_get_hardware_thread_id_in_block();
/// External interface to get the number of threads.
-uint32_t __kmpc_get_hardware_num_threads_in_block();
+OMP_ATTRS uint32_t __kmpc_get_hardware_num_threads_in_block();
/// External interface to get the warp size.
-uint32_t __kmpc_get_warp_size();
+OMP_ATTRS uint32_t __kmpc_get_warp_size();
/// Kernel
///
@@ -219,27 +220,26 @@ uint32_t __kmpc_get_warp_size();
// Forward declaration
struct KernelEnvironmentTy;
-int8_t __kmpc_is_spmd_exec_mode();
+OMP_ATTRS int8_t __kmpc_is_spmd_exec_mode();
-int32_t __kmpc_target_init(KernelEnvironmentTy &KernelEnvironment,
- KernelLaunchEnvironmentTy &KernelLaunchEnvironment);
+OMP_ATTRS int32_t
+__kmpc_target_init(KernelEnvironmentTy &KernelEnvironment,
+ KernelLaunchEnvironmentTy &KernelLaunchEnvironment);
-void __kmpc_target_deinit();
+OMP_ATTRS void __kmpc_target_deinit();
///}
/// Reduction
///
///{
-void *__kmpc_reduction_get_fixed_buffer();
+OMP_ATTRS void *__kmpc_reduction_get_fixed_buffer();
-int32_t __kmpc_nvptx_parallel_reduce_nowait_v2(IdentTy *Loc,
- uint64_t reduce_data_size,
- void *reduce_data,
- ShuffleReductFnTy shflFct,
- InterWarpCopyFnTy cpyFct);
+OMP_ATTRS int32_t __kmpc_nvptx_parallel_reduce_nowait_v2(
+ IdentTy *Loc, uint64_t reduce_data_size, void *reduce_data,
+ ShuffleReductFnTy shflFct, InterWarpCopyFnTy cpyFct);
-int32_t __kmpc_nvptx_teams_reduce_nowait_v2(
+OMP_ATTRS int32_t __kmpc_nvptx_teams_reduce_nowait_v2(
IdentTy *Loc, void *GlobalBuffer, uint32_t num_of_records,
uint64_t reduce_data_size, void *reduce_data, ShuffleReductFnTy shflFct,
InterWarpCopyFnTy cpyFct, ListGlobalFnTy lgcpyFct, ListGlobalFnTy lgredFct,
@@ -249,116 +249,120 @@ int32_t __kmpc_nvptx_teams_reduce_nowait_v2(
/// Synchronization
///
///{
-void __kmpc_ordered(IdentTy *Loc, int32_t TId);
+OMP_ATTRS void __kmpc_ordered(IdentTy *Loc, int32_t TId);
-void __kmpc_end_ordered(IdentTy *Loc, int32_t TId);
+OMP_ATTRS void __kmpc_end_ordered(IdentTy *Loc, int32_t TId);
-int32_t __kmpc_cancel_barrier(IdentTy *Loc_ref, int32_t TId);
+OMP_ATTRS int32_t __kmpc_cancel_barrier(IdentTy *Loc_ref, int32_t TId);
-void __kmpc_barrier(IdentTy *Loc_ref, int32_t TId);
+OMP_ATTRS void __kmpc_barrier(IdentTy *Loc_ref, int32_t TId);
-void __kmpc_barrier_simple_spmd(IdentTy *Loc_ref, int32_t TId);
+OMP_ATTRS void __kmpc_barrier_simple_spmd(IdentTy *Loc_ref, int32_t TId);
-void __kmpc_barrier_simple_generic(IdentTy *Loc_ref, int32_t TId);
+OMP_ATTRS void __kmpc_barrier_simple_generic(IdentTy *Loc_ref, int32_t TId);
-int32_t __kmpc_master(IdentTy *Loc, int32_t TId);
+OMP_ATTRS int32_t __kmpc_master(IdentTy *Loc, int32_t TId);
-void __kmpc_end_master(IdentTy *Loc, int32_t TId);
+OMP_ATTRS void __kmpc_end_master(IdentTy *Loc, int32_t TId);
-int32_t __kmpc_masked(IdentTy *Loc, int32_t TId, int32_t Filter);
+OMP_ATTRS int32_t __kmpc_masked(IdentTy *Loc, int32_t TId, int32_t Filter);
-void __kmpc_end_masked(IdentTy *Loc, int32_t TId);
+OMP_ATTRS void __kmpc_end_masked(IdentTy *Loc, int32_t TId);
-int32_t __kmpc_single(IdentTy *Loc, int32_t TId);
+OMP_ATTRS int32_t __kmpc_single(IdentTy *Loc, int32_t TId);
-void __kmpc_end_single(IdentTy *Loc, int32_t TId);
+OMP_ATTRS void __kmpc_end_single(IdentTy *Loc, int32_t TId);
-void __kmpc_flush(IdentTy *Loc);
+OMP_ATTRS void __kmpc_flush(IdentTy *Loc);
-uint64_t __kmpc_warp_active_thread_mask(void);
+OMP_ATTRS uint64_t __kmpc_warp_active_thread_mask(void);
-void __kmpc_syncwarp(uint64_t Mask);
+OMP_ATTRS void __kmpc_syncwarp(uint64_t Mask);
-void __kmpc_critical(IdentTy *Loc, int32_t TId, CriticalNameTy *Name);
+OMP_ATTRS void __kmpc_critical(IdentTy *Loc, int32_t TId, CriticalNameTy *Name);
-void __kmpc_end_critical(IdentTy *Loc, int32_t TId, CriticalNameTy *Name);
+OMP_ATTRS void __kmpc_end_critical(IdentTy *Loc, int32_t TId,
+ CriticalNameTy *Name);
///}
/// Parallelism
///
///{
/// TODO
-void __kmpc_kernel_prepare_parallel(ParallelRegionFnTy WorkFn);
+OMP_ATTRS void __kmpc_kernel_prepare_parallel(ParallelRegionFnTy WorkFn);
/// TODO
-bool __kmpc_kernel_parallel(ParallelRegionFnTy *WorkFn);
+OMP_ATTRS bool __kmpc_kernel_parallel(ParallelRegionFnTy *WorkFn);
/// TODO
-void __kmpc_kernel_end_parallel();
+OMP_ATTRS void __kmpc_kernel_end_parallel();
/// TODO
-void __kmpc_push_proc_bind(IdentTy *Loc, uint32_t TId, int ProcBind);
+OMP_ATTRS void __kmpc_push_proc_bind(IdentTy *Loc, uint32_t TId, int ProcBind);
/// TODO
-void __kmpc_push_num_teams(IdentTy *Loc, int32_t TId, int32_t NumTeams,
- int32_t ThreadLimit);
+OMP_ATTRS void __kmpc_push_num_teams(IdentTy *Loc, int32_t TId,
+ int32_t NumTeams, int32_t ThreadLimit);
/// TODO
-uint16_t __kmpc_parallel_level(IdentTy *Loc, uint32_t);
+OMP_ATTRS uint16_t __kmpc_parallel_level(IdentTy *Loc, uint32_t);
///}
/// Tasking
///
///{
-TaskDescriptorTy *__kmpc_omp_task_alloc(IdentTy *, int32_t, int32_t,
- size_t TaskSizeInclPrivateValues,
- size_t SharedValuesSize,
- TaskFnTy TaskFn);
+OMP_ATTRS TaskDescriptorTy *
+__kmpc_omp_task_alloc(IdentTy *, int32_t, int32_t,
+ size_t...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/122399
More information about the llvm-commits
mailing list