[Openmp-commits] [openmp] Attributes (PR #69358)

Joseph Huber via Openmp-commits openmp-commits at lists.llvm.org
Tue Oct 17 10:17:48 PDT 2023


https://github.com/jhuber6 created https://github.com/llvm/llvm-project/pull/69358

- [Libomptarget] Make the references to 'malloc' and 'free' weak.
- [Libomptarget][NFC] Use C++ style attributes instead


>From 9c44ea8ab569e5995261ea77b62ae1fc6d94370a Mon Sep 17 00:00:00 2001
From: Joseph Huber <jhuber6 at vols.utk.edu>
Date: Tue, 17 Oct 2023 11:45:19 -0500
Subject: [PATCH 1/2] [Libomptarget] Make the references to 'malloc' and 'free'
 weak.

Summary:
We use `malloc` internally in the DeviceRTL to handle data
globalization. If this is undefined it will map to the Nvidia
implementation of `malloc` for NVPTX and return `nullptr` for AMDGPU.
This is somewhat problematic, because when using this as a shared
library it causes us to always extract the GPU libc implementation,
which uses RPC and thus requires an RPC server. Making this `weak`
allows us to implement this internally without worrying about binding to
the GPU `libc` implementation.
---
 openmp/libomptarget/DeviceRTL/src/State.cpp | 4 ++--
 openmp/libomptarget/DeviceRTL/src/exports   | 2 ++
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/openmp/libomptarget/DeviceRTL/src/State.cpp b/openmp/libomptarget/DeviceRTL/src/State.cpp
index 721137cb95d658b..422747a94e7943a 100644
--- a/openmp/libomptarget/DeviceRTL/src/State.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/State.cpp
@@ -46,8 +46,8 @@ namespace {
 ///{
 
 extern "C" {
-__attribute__((leaf)) void *malloc(uint64_t Size);
-__attribute__((leaf)) void free(void *Ptr);
+[[gnu::weak, gnu::leaf]] void *malloc(uint64_t Size);
+[[gnu::weak, gnu::leaf]] void free(void *Ptr);
 }
 
 ///}
diff --git a/openmp/libomptarget/DeviceRTL/src/exports b/openmp/libomptarget/DeviceRTL/src/exports
index fbcda3ce8f555ca..288ddf90b4a9f2d 100644
--- a/openmp/libomptarget/DeviceRTL/src/exports
+++ b/openmp/libomptarget/DeviceRTL/src/exports
@@ -11,6 +11,8 @@ _ZN4ompx*
 
 IsSPMDMode
 
+malloc
+free
 memcmp
 printf
 __assert_fail

>From af6165564e6da26bf997a3a9b270cfdda54dbcd4 Mon Sep 17 00:00:00 2001
From: Joseph Huber <jhuber6 at vols.utk.edu>
Date: Tue, 17 Oct 2023 12:16:18 -0500
Subject: [PATCH 2/2] [Libomptarget][NFC] Use C++ style attributes instead

Summary:
This patch changes no functionality and simply switches to using the C++
styled attributes. These tend to be shorter and have better IDE /
completion support as they are in the actual language.
---
 openmp/libomptarget/DeviceRTL/include/State.h | 26 +++++++++----------
 .../DeviceRTL/include/Synchronization.h       |  2 +-
 openmp/libomptarget/DeviceRTL/include/Utils.h |  2 +-
 .../DeviceRTL/src/Configuration.cpp           |  5 ++--
 openmp/libomptarget/DeviceRTL/src/Mapping.cpp |  8 +++---
 .../DeviceRTL/src/Parallelism.cpp             | 14 +++++-----
 openmp/libomptarget/DeviceRTL/src/State.cpp   | 12 ++++-----
 .../DeviceRTL/src/Synchronization.cpp         |  7 +++--
 openmp/libomptarget/DeviceRTL/src/Utils.cpp   |  2 +-
 9 files changed, 36 insertions(+), 42 deletions(-)

diff --git a/openmp/libomptarget/DeviceRTL/include/State.h b/openmp/libomptarget/DeviceRTL/include/State.h
index 60dc439f9551c21..5db5e27ebe8886b 100644
--- a/openmp/libomptarget/DeviceRTL/include/State.h
+++ b/openmp/libomptarget/DeviceRTL/include/State.h
@@ -176,7 +176,7 @@ inline uint32_t &lookupImpl(uint32_t state::ICVStateTy::*Var,
   return TeamState.ICVState.*Var;
 }
 
-__attribute__((always_inline, flatten)) inline uint32_t &
+[[gnu::always_inline, gnu::flatten]] inline uint32_t &
 lookup32(ValueKind Kind, bool IsReadonly, IdentTy *Ident, bool ForceTeamState) {
   switch (Kind) {
   case state::VK_NThreads:
@@ -218,7 +218,7 @@ lookup32(ValueKind Kind, bool IsReadonly, IdentTy *Ident, bool ForceTeamState) {
   __builtin_unreachable();
 }
 
-__attribute__((always_inline, flatten)) inline void *&
+[[gnu::always_inline, gnu::flatten]] inline void *&
 lookupPtr(ValueKind Kind, bool IsReadonly, bool ForceTeamState) {
   switch (Kind) {
   case state::VK_ParallelRegionFn:
@@ -232,47 +232,45 @@ lookupPtr(ValueKind Kind, bool IsReadonly, bool ForceTeamState) {
 /// A class without actual state used to provide a nice interface to lookup and
 /// update ICV values we can declare in global scope.
 template <typename Ty, ValueKind Kind> struct Value {
-  __attribute__((flatten, always_inline)) operator Ty() {
+  [[gnu::flatten, gnu::always_inline]] operator Ty() {
     return lookup(/* IsReadonly */ true, /* IdentTy */ nullptr,
                   /* ForceTeamState */ false);
   }
 
-  __attribute__((flatten, always_inline)) Value &operator=(const Ty &Other) {
+  [[gnu::flatten, gnu::always_inline]] Value &operator=(const Ty &Other) {
     set(Other, /* IdentTy */ nullptr);
     return *this;
   }
 
-  __attribute__((flatten, always_inline)) Value &operator++() {
+  [[gnu::flatten, gnu::always_inline]] Value &operator++() {
     inc(1, /* IdentTy */ nullptr);
     return *this;
   }
 
-  __attribute__((flatten, always_inline)) Value &operator--() {
+  [[gnu::flatten, gnu::always_inline]] Value &operator--() {
     inc(-1, /* IdentTy */ nullptr);
     return *this;
   }
 
-  __attribute__((flatten, always_inline)) void
+  [[gnu::flatten, gnu::always_inline]] void
   assert_eq(const Ty &V, IdentTy *Ident = nullptr,
             bool ForceTeamState = false) {
     ASSERT(lookup(/* IsReadonly */ true, Ident, ForceTeamState) == V, nullptr);
   }
 
 private:
-  __attribute__((flatten, always_inline)) Ty &
+  [[gnu::flatten, gnu::always_inline]] Ty &
   lookup(bool IsReadonly, IdentTy *Ident, bool ForceTeamState) {
     Ty &t = lookup32(Kind, IsReadonly, Ident, ForceTeamState);
     return t;
   }
 
-  __attribute__((flatten, always_inline)) Ty &inc(int UpdateVal,
-                                                  IdentTy *Ident) {
+  [[gnu::flatten, gnu::always_inline]] Ty &inc(int UpdateVal, IdentTy *Ident) {
     return (lookup(/* IsReadonly */ false, Ident, /* ForceTeamState */ false) +=
             UpdateVal);
   }
 
-  __attribute__((flatten, always_inline)) Ty &set(Ty UpdateVal,
-                                                  IdentTy *Ident) {
+  [[gnu::flatten, gnu::always_inline]] Ty &set(Ty UpdateVal, IdentTy *Ident) {
     return (lookup(/* IsReadonly */ false, Ident, /* ForceTeamState */ false) =
                 UpdateVal);
   }
@@ -284,12 +282,12 @@ template <typename Ty, ValueKind Kind> struct Value {
 /// a nice interface to lookup and update ICV values
 /// we can declare in global scope.
 template <typename Ty, ValueKind Kind> struct PtrValue {
-  __attribute__((flatten, always_inline)) operator Ty() {
+  [[gnu::flatten, gnu::always_inline]] operator Ty() {
     return lookup(/* IsReadonly */ true, /* IdentTy */ nullptr,
                   /* ForceTeamState */ false);
   }
 
-  __attribute__((flatten, always_inline)) PtrValue &operator=(const Ty Other) {
+  [[gnu::flatten, gnu::always_inline]] PtrValue &operator=(const Ty Other) {
     set(Other);
     return *this;
   }
diff --git a/openmp/libomptarget/DeviceRTL/include/Synchronization.h b/openmp/libomptarget/DeviceRTL/include/Synchronization.h
index b31238fbbc9c749..af9e1a673e6a236 100644
--- a/openmp/libomptarget/DeviceRTL/include/Synchronization.h
+++ b/openmp/libomptarget/DeviceRTL/include/Synchronization.h
@@ -115,7 +115,7 @@ void threads(atomic::OrderingTy Ordering);
 /// (hence all threads in the block are "aligned"). Also perform a fence before
 /// and after the barrier according to \p Ordering. Note that the
 /// fence might be part of the barrier if the target offers this.
-__attribute__((noinline)) void threadsAligned(atomic::OrderingTy Ordering);
+[[gnu::noinline]] void threadsAligned(atomic::OrderingTy Ordering);
 
 #pragma omp end assumes
 ///}
diff --git a/openmp/libomptarget/DeviceRTL/include/Utils.h b/openmp/libomptarget/DeviceRTL/include/Utils.h
index 94da763717e22fe..4ab0aea46eea122 100644
--- a/openmp/libomptarget/DeviceRTL/include/Utils.h
+++ b/openmp/libomptarget/DeviceRTL/include/Utils.h
@@ -83,7 +83,7 @@ template <typename DstTy, typename SrcTy> inline DstTy convertViaPun(SrcTy V) {
 }
 
 /// A  pointer variable that has by design an `undef` value. Use with care.
-__attribute__((loader_uninitialized)) static void *const UndefPtr;
+[[clang::loader_uninitialized]] static void *const UndefPtr;
 
 #define OMP_LIKELY(EXPR) __builtin_expect((bool)(EXPR), true)
 #define OMP_UNLIKELY(EXPR) __builtin_expect((bool)(EXPR), false)
diff --git a/openmp/libomptarget/DeviceRTL/src/Configuration.cpp b/openmp/libomptarget/DeviceRTL/src/Configuration.cpp
index 809c5f03886b048..a792e5be568e6ee 100644
--- a/openmp/libomptarget/DeviceRTL/src/Configuration.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/Configuration.cpp
@@ -27,8 +27,9 @@ using namespace ompx;
 
 // This variable should be visibile to the plugin so we override the default
 // hidden visibility.
-DeviceEnvironmentTy CONSTANT(__omp_rtl_device_environment)
-    __attribute__((used, retain, weak, visibility("protected")));
+[[gnu::used, gnu::retain, gnu::weak,
+  gnu::visibility("protected")]] DeviceEnvironmentTy
+    CONSTANT(__omp_rtl_device_environment);
 
 uint32_t config::getDebugKind() {
   return __omp_rtl_debug_kind & __omp_rtl_device_environment.DebugKind;
diff --git a/openmp/libomptarget/DeviceRTL/src/Mapping.cpp b/openmp/libomptarget/DeviceRTL/src/Mapping.cpp
index c75a694fce35b6d..822b8dc2dd5e671 100644
--- a/openmp/libomptarget/DeviceRTL/src/Mapping.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/Mapping.cpp
@@ -345,7 +345,7 @@ uint32_t mapping::getNumberOfProcessorElements() {
 
 // TODO: This is a workaround for initialization coming from kernels outside of
 //       the TU. We will need to solve this more correctly in the future.
-int __attribute__((weak)) SHARED(IsSPMDMode);
+[[gnu::weak]] int SHARED(IsSPMDMode);
 
 void mapping::init(bool IsSPMD) {
   if (mapping::isInitialThreadInLevel0(IsSPMD))
@@ -358,15 +358,15 @@ bool mapping::isGenericMode() { return !isSPMDMode(); }
 ///}
 
 extern "C" {
-__attribute__((noinline)) uint32_t __kmpc_get_hardware_thread_id_in_block() {
+[[gnu::noinline]] uint32_t __kmpc_get_hardware_thread_id_in_block() {
   return mapping::getThreadIdInBlock();
 }
 
-__attribute__((noinline)) uint32_t __kmpc_get_hardware_num_threads_in_block() {
+[[gnu::noinline]] uint32_t __kmpc_get_hardware_num_threads_in_block() {
   return impl::getNumberOfThreadsInBlock(mapping::DIM_X);
 }
 
-__attribute__((noinline)) uint32_t __kmpc_get_warp_size() {
+[[gnu::noinline]] uint32_t __kmpc_get_warp_size() {
   return impl::getWarpSize();
 }
 }
diff --git a/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp b/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp
index 1610b74fc78bc97..2c0701bd5358fd9 100644
--- a/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp
@@ -68,10 +68,9 @@ uint32_t determineNumberOfThreads(int32_t NumThreadsClause) {
 }
 
 // Invoke an outlined parallel function unwrapping arguments (up to 32).
-__attribute__((always_inline)) void invokeMicrotask(int32_t global_tid,
-                                                    int32_t bound_tid, void *fn,
-                                                    void **args,
-                                                    int64_t nargs) {
+[[clang::always_inline]] void invokeMicrotask(int32_t global_tid,
+                                              int32_t bound_tid, void *fn,
+                                              void **args, int64_t nargs) {
   switch (nargs) {
 #include "generated_microtask_cases.gen"
   default:
@@ -84,7 +83,7 @@ __attribute__((always_inline)) void invokeMicrotask(int32_t global_tid,
 
 extern "C" {
 
-__attribute__((always_inline)) void
+[[clang::always_inline]] void
 __kmpc_parallel_51(IdentTy *ident, int32_t, int32_t if_expr,
                    int32_t num_threads, int proc_bind, void *fn,
                    void *wrapper_fn, void **args, int64_t nargs) {
@@ -262,8 +261,7 @@ __kmpc_parallel_51(IdentTy *ident, int32_t, int32_t if_expr,
     __kmpc_end_sharing_variables();
 }
 
-__attribute__((noinline)) bool
-__kmpc_kernel_parallel(ParallelRegionFnTy *WorkFn) {
+[[clang::noinline]] bool __kmpc_kernel_parallel(ParallelRegionFnTy *WorkFn) {
   // Work function and arguments for L1 parallel region.
   *WorkFn = state::ParallelRegionFn;
 
@@ -277,7 +275,7 @@ __kmpc_kernel_parallel(ParallelRegionFnTy *WorkFn) {
   return ThreadIsActive;
 }
 
-__attribute__((noinline)) void __kmpc_kernel_end_parallel() {
+[[clang::noinline]] void __kmpc_kernel_end_parallel() {
   // In case we have modified an ICV for this thread before a ThreadState was
   // created. We drop it now to not contaminate the next parallel region.
   ASSERT(!mapping::isSPMDMode(), nullptr);
diff --git a/openmp/libomptarget/DeviceRTL/src/State.cpp b/openmp/libomptarget/DeviceRTL/src/State.cpp
index 422747a94e7943a..c34adfb94d7c731 100644
--- a/openmp/libomptarget/DeviceRTL/src/State.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/State.cpp
@@ -31,7 +31,7 @@ using namespace ompx;
 constexpr const uint32_t Alignment = 16;
 
 /// External symbol to access dynamic shared memory.
-extern unsigned char DynamicSharedBuffer[] __attribute__((aligned(Alignment)));
+[[gnu::aligned(Alignment)]] extern unsigned char DynamicSharedBuffer[];
 #pragma omp allocate(DynamicSharedBuffer) allocator(omp_pteam_mem_alloc)
 
 /// The kernel environment passed to the init method by the compiler.
@@ -105,10 +105,8 @@ struct SharedMemorySmartStackTy {
   }
 
   /// The actual storage, shared among all warps.
-  unsigned char Data[state::SharedScratchpadSize]
-      __attribute__((aligned(Alignment)));
-  unsigned char Usage[mapping::MaxThreadsPerTeam]
-      __attribute__((aligned(Alignment)));
+  [[gnu::aligned(Alignment)]] unsigned char Data[state::SharedScratchpadSize];
+  [[gnu::aligned(Alignment)]] unsigned char Usage[mapping::MaxThreadsPerTeam];
 };
 
 static_assert(state::SharedScratchpadSize / mapping::MaxThreadsPerTeam <= 256,
@@ -423,11 +421,11 @@ int omp_get_initial_device(void) { return -1; }
 }
 
 extern "C" {
-__attribute__((noinline)) void *__kmpc_alloc_shared(uint64_t Bytes) {
+[[clang::noinline]] void *__kmpc_alloc_shared(uint64_t Bytes) {
   return memory::allocShared(Bytes, "Frontend alloc shared");
 }
 
-__attribute__((noinline)) void __kmpc_free_shared(void *Ptr, uint64_t Bytes) {
+[[clang::noinline]] void __kmpc_free_shared(void *Ptr, uint64_t Bytes) {
   memory::freeShared(Ptr, Bytes, "Frontend free shared");
 }
 
diff --git a/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp b/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp
index 3370c5a8472f0b9..b9a192f0d84df9a 100644
--- a/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp
@@ -523,13 +523,12 @@ void __kmpc_barrier(IdentTy *Loc, int32_t TId) {
   impl::namedBarrier();
 }
 
-__attribute__((noinline)) void __kmpc_barrier_simple_spmd(IdentTy *Loc,
-                                                          int32_t TId) {
+[[clang::noinline]] void __kmpc_barrier_simple_spmd(IdentTy *Loc, int32_t TId) {
   synchronize::threadsAligned(atomic::OrderingTy::seq_cst);
 }
 
-__attribute__((noinline)) void __kmpc_barrier_simple_generic(IdentTy *Loc,
-                                                             int32_t TId) {
+[[clang::noinline]] void __kmpc_barrier_simple_generic(IdentTy *Loc,
+                                                       int32_t TId) {
   synchronize::threads(atomic::OrderingTy::seq_cst);
 }
 
diff --git a/openmp/libomptarget/DeviceRTL/src/Utils.cpp b/openmp/libomptarget/DeviceRTL/src/Utils.cpp
index 6125236863098f5..b39465aaa2ace5f 100644
--- a/openmp/libomptarget/DeviceRTL/src/Utils.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/Utils.cpp
@@ -19,7 +19,7 @@
 
 using namespace ompx;
 
-extern "C" __attribute__((weak)) int IsSPMDMode;
+extern "C" [[gnu::weak]] int IsSPMDMode;
 
 namespace impl {
 



More information about the Openmp-commits mailing list