[flang-commits] [flang] [flang][cuda] Add RT_API_ATTRS to allocator functions (PR #102235)

Tue Aug 6 14:40:22 PDT 2024

llvmbot wrote:




@llvm/pr-subscribers-flang-runtime

Author: Valentin Clement (バレンタイン クレメン) (clementval)

<details>
<summary>Changes</summary>

Allocators might be called from device code. Add RT_API_ATTRS so it can be build for the device. 

---
Full diff: https://github.com/llvm/llvm-project/pull/102235.diff


2 Files Affected:

- (modified) flang/include/flang/Runtime/CUDA/allocator.h (+9-9) 
- (modified) flang/runtime/CUDA/allocator.cpp (+11-9) 


``````````diff

diff --git a/flang/include/flang/Runtime/CUDA/allocator.h b/flang/include/flang/Runtime/CUDA/allocator.h
index 8f5204769d7aa..309582c296969 100644
--- a/flang/include/flang/Runtime/CUDA/allocator.h
+++ b/flang/include/flang/Runtime/CUDA/allocator.h
@@ -25,19 +25,19 @@
 
 namespace Fortran::runtime::cuda {
 
-void CUFRegisterAllocator();
+RT_API_ATTRS void CUFRegisterAllocator();
 
-void *CUFAllocPinned(std::size_t);
-void CUFFreePinned(void *);
+RT_API_ATTRS void *CUFAllocPinned(std::size_t);
+RT_API_ATTRS void CUFFreePinned(void *);
 
-void *CUFAllocDevice(std::size_t);
-void CUFFreeDevice(void *);
+RT_API_ATTRS void *CUFAllocDevice(std::size_t);
+RT_API_ATTRS void CUFFreeDevice(void *);
 
-void *CUFAllocManaged(std::size_t);
-void CUFFreeManaged(void *);
+RT_API_ATTRS void *CUFAllocManaged(std::size_t);
+RT_API_ATTRS void CUFFreeManaged(void *);
 
-void *CUFAllocUnified(std::size_t);
-void CUFFreeUnified(void *);
+RT_API_ATTRS void *CUFAllocUnified(std::size_t);
+RT_API_ATTRS void CUFFreeUnified(void *);
 
 } // namespace Fortran::runtime::cuda
 #endif // FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_
diff --git a/flang/runtime/CUDA/allocator.cpp b/flang/runtime/CUDA/allocator.cpp
index cd00d40361d28..ff645e27f4f6d 100644
--- a/flang/runtime/CUDA/allocator.cpp
+++ b/flang/runtime/CUDA/allocator.cpp
@@ -19,7 +19,7 @@
 
 namespace Fortran::runtime::cuda {
 
-void CUFRegisterAllocator() {
+RT_API_ATTRS void CUFRegisterAllocator() {
   allocatorRegistry.Register(
       kPinnedAllocatorPos, {&CUFAllocPinned, CUFFreePinned});
   allocatorRegistry.Register(
@@ -30,41 +30,43 @@ void CUFRegisterAllocator() {
       kUnifiedAllocatorPos, {&CUFAllocUnified, CUFFreeUnified});
 }
 
-void *CUFAllocPinned(std::size_t sizeInBytes) {
+RT_API_ATTRS void *CUFAllocPinned(std::size_t sizeInBytes) {
   void *p;
   CUDA_REPORT_IF_ERROR(cuMemAllocHost(&p, sizeInBytes));
   return p;
 }
 
-void CUFFreePinned(void *p) { CUDA_REPORT_IF_ERROR(cuMemFreeHost(p)); }
+RT_API_ATTRS void CUFFreePinned(void *p) {
+  CUDA_REPORT_IF_ERROR(cuMemFreeHost(p));
+}
 
-void *CUFAllocDevice(std::size_t sizeInBytes) {
+RT_API_ATTRS void *CUFAllocDevice(std::size_t sizeInBytes) {
   CUdeviceptr p = 0;
   CUDA_REPORT_IF_ERROR(cuMemAlloc(&p, sizeInBytes));
   return reinterpret_cast<void *>(p);
 }
 
-void CUFFreeDevice(void *p) {
+RT_API_ATTRS void CUFFreeDevice(void *p) {
   CUDA_REPORT_IF_ERROR(cuMemFree(reinterpret_cast<CUdeviceptr>(p)));
 }
 
-void *CUFAllocManaged(std::size_t sizeInBytes) {
+RT_API_ATTRS void *CUFAllocManaged(std::size_t sizeInBytes) {
   CUdeviceptr p = 0;
   CUDA_REPORT_IF_ERROR(
       cuMemAllocManaged(&p, sizeInBytes, CU_MEM_ATTACH_GLOBAL));
   return reinterpret_cast<void *>(p);
 }
 
-void CUFFreeManaged(void *p) {
+RT_API_ATTRS void CUFFreeManaged(void *p) {
   CUDA_REPORT_IF_ERROR(cuMemFree(reinterpret_cast<CUdeviceptr>(p)));
 }
 
-void *CUFAllocUnified(std::size_t sizeInBytes) {
+RT_API_ATTRS void *CUFAllocUnified(std::size_t sizeInBytes) {
   // Call alloc managed for the time being.
   return CUFAllocManaged(sizeInBytes);
 }
 
-void CUFFreeUnified(void *p) {
+RT_API_ATTRS void CUFFreeUnified(void *p) {
   // Call free managed for the time being.
   CUFFreeManaged(p);
 }

``````````

</details>


https://github.com/llvm/llvm-project/pull/102235