[Openmp-commits] [openmp] Attributes (PR #69358)
Joseph Huber via Openmp-commits
openmp-commits at lists.llvm.org
Tue Oct 17 10:17:48 PDT 2023
https://github.com/jhuber6 created https://github.com/llvm/llvm-project/pull/69358
- [Libomptarget] Make the references to 'malloc' and 'free' weak.
- [Libomptarget][NFC] Use C++ style attributes instead
>From 9c44ea8ab569e5995261ea77b62ae1fc6d94370a Mon Sep 17 00:00:00 2001
From: Joseph Huber <jhuber6 at vols.utk.edu>
Date: Tue, 17 Oct 2023 11:45:19 -0500
Subject: [PATCH 1/2] [Libomptarget] Make the references to 'malloc' and 'free'
weak.
Summary:
We use `malloc` internally in the DeviceRTL to handle data
globalization. If this is undefined it will map to the Nvidia
implementation of `malloc` for NVPTX and return `nullptr` for AMDGPU.
This is somewhat problematic, because when using this as a shared
library it causes us to always extract the GPU libc implementation,
which uses RPC and thus requires an RPC server. Making this `weak`
allows us to implement this internally without worrying about binding to
the GPU `libc` implementation.
---
openmp/libomptarget/DeviceRTL/src/State.cpp | 4 ++--
openmp/libomptarget/DeviceRTL/src/exports | 2 ++
2 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/openmp/libomptarget/DeviceRTL/src/State.cpp b/openmp/libomptarget/DeviceRTL/src/State.cpp
index 721137cb95d658b..422747a94e7943a 100644
--- a/openmp/libomptarget/DeviceRTL/src/State.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/State.cpp
@@ -46,8 +46,8 @@ namespace {
///{
extern "C" {
-__attribute__((leaf)) void *malloc(uint64_t Size);
-__attribute__((leaf)) void free(void *Ptr);
+[[gnu::weak, gnu::leaf]] void *malloc(uint64_t Size);
+[[gnu::weak, gnu::leaf]] void free(void *Ptr);
}
///}
diff --git a/openmp/libomptarget/DeviceRTL/src/exports b/openmp/libomptarget/DeviceRTL/src/exports
index fbcda3ce8f555ca..288ddf90b4a9f2d 100644
--- a/openmp/libomptarget/DeviceRTL/src/exports
+++ b/openmp/libomptarget/DeviceRTL/src/exports
@@ -11,6 +11,8 @@ _ZN4ompx*
IsSPMDMode
+malloc
+free
memcmp
printf
__assert_fail
>From af6165564e6da26bf997a3a9b270cfdda54dbcd4 Mon Sep 17 00:00:00 2001
From: Joseph Huber <jhuber6 at vols.utk.edu>
Date: Tue, 17 Oct 2023 12:16:18 -0500
Subject: [PATCH 2/2] [Libomptarget][NFC] Use C++ style attributes instead
Summary:
This patch changes no functionality and simply switches to using the C++
styled attributes. These tend to be shorter and have better IDE /
completion support as they are in the actual language.
---
openmp/libomptarget/DeviceRTL/include/State.h | 26 +++++++++----------
.../DeviceRTL/include/Synchronization.h | 2 +-
openmp/libomptarget/DeviceRTL/include/Utils.h | 2 +-
.../DeviceRTL/src/Configuration.cpp | 5 ++--
openmp/libomptarget/DeviceRTL/src/Mapping.cpp | 8 +++---
.../DeviceRTL/src/Parallelism.cpp | 14 +++++-----
openmp/libomptarget/DeviceRTL/src/State.cpp | 12 ++++-----
.../DeviceRTL/src/Synchronization.cpp | 7 +++--
openmp/libomptarget/DeviceRTL/src/Utils.cpp | 2 +-
9 files changed, 36 insertions(+), 42 deletions(-)
diff --git a/openmp/libomptarget/DeviceRTL/include/State.h b/openmp/libomptarget/DeviceRTL/include/State.h
index 60dc439f9551c21..5db5e27ebe8886b 100644
--- a/openmp/libomptarget/DeviceRTL/include/State.h
+++ b/openmp/libomptarget/DeviceRTL/include/State.h
@@ -176,7 +176,7 @@ inline uint32_t &lookupImpl(uint32_t state::ICVStateTy::*Var,
return TeamState.ICVState.*Var;
}
-__attribute__((always_inline, flatten)) inline uint32_t &
+[[gnu::always_inline, gnu::flatten]] inline uint32_t &
lookup32(ValueKind Kind, bool IsReadonly, IdentTy *Ident, bool ForceTeamState) {
switch (Kind) {
case state::VK_NThreads:
@@ -218,7 +218,7 @@ lookup32(ValueKind Kind, bool IsReadonly, IdentTy *Ident, bool ForceTeamState) {
__builtin_unreachable();
}
-__attribute__((always_inline, flatten)) inline void *&
+[[gnu::always_inline, gnu::flatten]] inline void *&
lookupPtr(ValueKind Kind, bool IsReadonly, bool ForceTeamState) {
switch (Kind) {
case state::VK_ParallelRegionFn:
@@ -232,47 +232,45 @@ lookupPtr(ValueKind Kind, bool IsReadonly, bool ForceTeamState) {
/// A class without actual state used to provide a nice interface to lookup and
/// update ICV values we can declare in global scope.
template <typename Ty, ValueKind Kind> struct Value {
- __attribute__((flatten, always_inline)) operator Ty() {
+ [[gnu::flatten, gnu::always_inline]] operator Ty() {
return lookup(/* IsReadonly */ true, /* IdentTy */ nullptr,
/* ForceTeamState */ false);
}
- __attribute__((flatten, always_inline)) Value &operator=(const Ty &Other) {
+ [[gnu::flatten, gnu::always_inline]] Value &operator=(const Ty &Other) {
set(Other, /* IdentTy */ nullptr);
return *this;
}
- __attribute__((flatten, always_inline)) Value &operator++() {
+ [[gnu::flatten, gnu::always_inline]] Value &operator++() {
inc(1, /* IdentTy */ nullptr);
return *this;
}
- __attribute__((flatten, always_inline)) Value &operator--() {
+ [[gnu::flatten, gnu::always_inline]] Value &operator--() {
inc(-1, /* IdentTy */ nullptr);
return *this;
}
- __attribute__((flatten, always_inline)) void
+ [[gnu::flatten, gnu::always_inline]] void
assert_eq(const Ty &V, IdentTy *Ident = nullptr,
bool ForceTeamState = false) {
ASSERT(lookup(/* IsReadonly */ true, Ident, ForceTeamState) == V, nullptr);
}
private:
- __attribute__((flatten, always_inline)) Ty &
+ [[gnu::flatten, gnu::always_inline]] Ty &
lookup(bool IsReadonly, IdentTy *Ident, bool ForceTeamState) {
Ty &t = lookup32(Kind, IsReadonly, Ident, ForceTeamState);
return t;
}
- __attribute__((flatten, always_inline)) Ty &inc(int UpdateVal,
- IdentTy *Ident) {
+ [[gnu::flatten, gnu::always_inline]] Ty &inc(int UpdateVal, IdentTy *Ident) {
return (lookup(/* IsReadonly */ false, Ident, /* ForceTeamState */ false) +=
UpdateVal);
}
- __attribute__((flatten, always_inline)) Ty &set(Ty UpdateVal,
- IdentTy *Ident) {
+ [[gnu::flatten, gnu::always_inline]] Ty &set(Ty UpdateVal, IdentTy *Ident) {
return (lookup(/* IsReadonly */ false, Ident, /* ForceTeamState */ false) =
UpdateVal);
}
@@ -284,12 +282,12 @@ template <typename Ty, ValueKind Kind> struct Value {
/// a nice interface to lookup and update ICV values
/// we can declare in global scope.
template <typename Ty, ValueKind Kind> struct PtrValue {
- __attribute__((flatten, always_inline)) operator Ty() {
+ [[gnu::flatten, gnu::always_inline]] operator Ty() {
return lookup(/* IsReadonly */ true, /* IdentTy */ nullptr,
/* ForceTeamState */ false);
}
- __attribute__((flatten, always_inline)) PtrValue &operator=(const Ty Other) {
+ [[gnu::flatten, gnu::always_inline]] PtrValue &operator=(const Ty Other) {
set(Other);
return *this;
}
diff --git a/openmp/libomptarget/DeviceRTL/include/Synchronization.h b/openmp/libomptarget/DeviceRTL/include/Synchronization.h
index b31238fbbc9c749..af9e1a673e6a236 100644
--- a/openmp/libomptarget/DeviceRTL/include/Synchronization.h
+++ b/openmp/libomptarget/DeviceRTL/include/Synchronization.h
@@ -115,7 +115,7 @@ void threads(atomic::OrderingTy Ordering);
/// (hence all threads in the block are "aligned"). Also perform a fence before
/// and after the barrier according to \p Ordering. Note that the
/// fence might be part of the barrier if the target offers this.
-__attribute__((noinline)) void threadsAligned(atomic::OrderingTy Ordering);
+[[gnu::noinline]] void threadsAligned(atomic::OrderingTy Ordering);
#pragma omp end assumes
///}
diff --git a/openmp/libomptarget/DeviceRTL/include/Utils.h b/openmp/libomptarget/DeviceRTL/include/Utils.h
index 94da763717e22fe..4ab0aea46eea122 100644
--- a/openmp/libomptarget/DeviceRTL/include/Utils.h
+++ b/openmp/libomptarget/DeviceRTL/include/Utils.h
@@ -83,7 +83,7 @@ template <typename DstTy, typename SrcTy> inline DstTy convertViaPun(SrcTy V) {
}
/// A pointer variable that has by design an `undef` value. Use with care.
-__attribute__((loader_uninitialized)) static void *const UndefPtr;
+[[clang::loader_uninitialized]] static void *const UndefPtr;
#define OMP_LIKELY(EXPR) __builtin_expect((bool)(EXPR), true)
#define OMP_UNLIKELY(EXPR) __builtin_expect((bool)(EXPR), false)
diff --git a/openmp/libomptarget/DeviceRTL/src/Configuration.cpp b/openmp/libomptarget/DeviceRTL/src/Configuration.cpp
index 809c5f03886b048..a792e5be568e6ee 100644
--- a/openmp/libomptarget/DeviceRTL/src/Configuration.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/Configuration.cpp
@@ -27,8 +27,9 @@ using namespace ompx;
// This variable should be visibile to the plugin so we override the default
// hidden visibility.
-DeviceEnvironmentTy CONSTANT(__omp_rtl_device_environment)
- __attribute__((used, retain, weak, visibility("protected")));
+[[gnu::used, gnu::retain, gnu::weak,
+ gnu::visibility("protected")]] DeviceEnvironmentTy
+ CONSTANT(__omp_rtl_device_environment);
uint32_t config::getDebugKind() {
return __omp_rtl_debug_kind & __omp_rtl_device_environment.DebugKind;
diff --git a/openmp/libomptarget/DeviceRTL/src/Mapping.cpp b/openmp/libomptarget/DeviceRTL/src/Mapping.cpp
index c75a694fce35b6d..822b8dc2dd5e671 100644
--- a/openmp/libomptarget/DeviceRTL/src/Mapping.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/Mapping.cpp
@@ -345,7 +345,7 @@ uint32_t mapping::getNumberOfProcessorElements() {
// TODO: This is a workaround for initialization coming from kernels outside of
// the TU. We will need to solve this more correctly in the future.
-int __attribute__((weak)) SHARED(IsSPMDMode);
+[[gnu::weak]] int SHARED(IsSPMDMode);
void mapping::init(bool IsSPMD) {
if (mapping::isInitialThreadInLevel0(IsSPMD))
@@ -358,15 +358,15 @@ bool mapping::isGenericMode() { return !isSPMDMode(); }
///}
extern "C" {
-__attribute__((noinline)) uint32_t __kmpc_get_hardware_thread_id_in_block() {
+[[gnu::noinline]] uint32_t __kmpc_get_hardware_thread_id_in_block() {
return mapping::getThreadIdInBlock();
}
-__attribute__((noinline)) uint32_t __kmpc_get_hardware_num_threads_in_block() {
+[[gnu::noinline]] uint32_t __kmpc_get_hardware_num_threads_in_block() {
return impl::getNumberOfThreadsInBlock(mapping::DIM_X);
}
-__attribute__((noinline)) uint32_t __kmpc_get_warp_size() {
+[[gnu::noinline]] uint32_t __kmpc_get_warp_size() {
return impl::getWarpSize();
}
}
diff --git a/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp b/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp
index 1610b74fc78bc97..2c0701bd5358fd9 100644
--- a/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp
@@ -68,10 +68,9 @@ uint32_t determineNumberOfThreads(int32_t NumThreadsClause) {
}
// Invoke an outlined parallel function unwrapping arguments (up to 32).
-__attribute__((always_inline)) void invokeMicrotask(int32_t global_tid,
- int32_t bound_tid, void *fn,
- void **args,
- int64_t nargs) {
+[[clang::always_inline]] void invokeMicrotask(int32_t global_tid,
+ int32_t bound_tid, void *fn,
+ void **args, int64_t nargs) {
switch (nargs) {
#include "generated_microtask_cases.gen"
default:
@@ -84,7 +83,7 @@ __attribute__((always_inline)) void invokeMicrotask(int32_t global_tid,
extern "C" {
-__attribute__((always_inline)) void
+[[clang::always_inline]] void
__kmpc_parallel_51(IdentTy *ident, int32_t, int32_t if_expr,
int32_t num_threads, int proc_bind, void *fn,
void *wrapper_fn, void **args, int64_t nargs) {
@@ -262,8 +261,7 @@ __kmpc_parallel_51(IdentTy *ident, int32_t, int32_t if_expr,
__kmpc_end_sharing_variables();
}
-__attribute__((noinline)) bool
-__kmpc_kernel_parallel(ParallelRegionFnTy *WorkFn) {
+[[clang::noinline]] bool __kmpc_kernel_parallel(ParallelRegionFnTy *WorkFn) {
// Work function and arguments for L1 parallel region.
*WorkFn = state::ParallelRegionFn;
@@ -277,7 +275,7 @@ __kmpc_kernel_parallel(ParallelRegionFnTy *WorkFn) {
return ThreadIsActive;
}
-__attribute__((noinline)) void __kmpc_kernel_end_parallel() {
+[[clang::noinline]] void __kmpc_kernel_end_parallel() {
// In case we have modified an ICV for this thread before a ThreadState was
// created. We drop it now to not contaminate the next parallel region.
ASSERT(!mapping::isSPMDMode(), nullptr);
diff --git a/openmp/libomptarget/DeviceRTL/src/State.cpp b/openmp/libomptarget/DeviceRTL/src/State.cpp
index 422747a94e7943a..c34adfb94d7c731 100644
--- a/openmp/libomptarget/DeviceRTL/src/State.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/State.cpp
@@ -31,7 +31,7 @@ using namespace ompx;
constexpr const uint32_t Alignment = 16;
/// External symbol to access dynamic shared memory.
-extern unsigned char DynamicSharedBuffer[] __attribute__((aligned(Alignment)));
+[[gnu::aligned(Alignment)]] extern unsigned char DynamicSharedBuffer[];
#pragma omp allocate(DynamicSharedBuffer) allocator(omp_pteam_mem_alloc)
/// The kernel environment passed to the init method by the compiler.
@@ -105,10 +105,8 @@ struct SharedMemorySmartStackTy {
}
/// The actual storage, shared among all warps.
- unsigned char Data[state::SharedScratchpadSize]
- __attribute__((aligned(Alignment)));
- unsigned char Usage[mapping::MaxThreadsPerTeam]
- __attribute__((aligned(Alignment)));
+ [[gnu::aligned(Alignment)]] unsigned char Data[state::SharedScratchpadSize];
+ [[gnu::aligned(Alignment)]] unsigned char Usage[mapping::MaxThreadsPerTeam];
};
static_assert(state::SharedScratchpadSize / mapping::MaxThreadsPerTeam <= 256,
@@ -423,11 +421,11 @@ int omp_get_initial_device(void) { return -1; }
}
extern "C" {
-__attribute__((noinline)) void *__kmpc_alloc_shared(uint64_t Bytes) {
+[[clang::noinline]] void *__kmpc_alloc_shared(uint64_t Bytes) {
return memory::allocShared(Bytes, "Frontend alloc shared");
}
-__attribute__((noinline)) void __kmpc_free_shared(void *Ptr, uint64_t Bytes) {
+[[clang::noinline]] void __kmpc_free_shared(void *Ptr, uint64_t Bytes) {
memory::freeShared(Ptr, Bytes, "Frontend free shared");
}
diff --git a/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp b/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp
index 3370c5a8472f0b9..b9a192f0d84df9a 100644
--- a/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp
@@ -523,13 +523,12 @@ void __kmpc_barrier(IdentTy *Loc, int32_t TId) {
impl::namedBarrier();
}
-__attribute__((noinline)) void __kmpc_barrier_simple_spmd(IdentTy *Loc,
- int32_t TId) {
+[[clang::noinline]] void __kmpc_barrier_simple_spmd(IdentTy *Loc, int32_t TId) {
synchronize::threadsAligned(atomic::OrderingTy::seq_cst);
}
-__attribute__((noinline)) void __kmpc_barrier_simple_generic(IdentTy *Loc,
- int32_t TId) {
+[[clang::noinline]] void __kmpc_barrier_simple_generic(IdentTy *Loc,
+ int32_t TId) {
synchronize::threads(atomic::OrderingTy::seq_cst);
}
diff --git a/openmp/libomptarget/DeviceRTL/src/Utils.cpp b/openmp/libomptarget/DeviceRTL/src/Utils.cpp
index 6125236863098f5..b39465aaa2ace5f 100644
--- a/openmp/libomptarget/DeviceRTL/src/Utils.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/Utils.cpp
@@ -19,7 +19,7 @@
using namespace ompx;
-extern "C" __attribute__((weak)) int IsSPMDMode;
+extern "C" [[gnu::weak]] int IsSPMDMode;
namespace impl {
More information about the Openmp-commits
mailing list