[flang-commits] [flang] [flang][cuda] Add RT_API_ATTRS to allocator functions (PR #102235)
via flang-commits
flang-commits at lists.llvm.org
Tue Aug 6 14:40:22 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-flang-runtime
Author: Valentin Clement (バレンタイン クレメン) (clementval)
<details>
<summary>Changes</summary>
Allocators might be called from device code. Add RT_API_ATTRS so it can be build for the device.
---
Full diff: https://github.com/llvm/llvm-project/pull/102235.diff
2 Files Affected:
- (modified) flang/include/flang/Runtime/CUDA/allocator.h (+9-9)
- (modified) flang/runtime/CUDA/allocator.cpp (+11-9)
``````````diff
diff --git a/flang/include/flang/Runtime/CUDA/allocator.h b/flang/include/flang/Runtime/CUDA/allocator.h
index 8f5204769d7aa..309582c296969 100644
--- a/flang/include/flang/Runtime/CUDA/allocator.h
+++ b/flang/include/flang/Runtime/CUDA/allocator.h
@@ -25,19 +25,19 @@
namespace Fortran::runtime::cuda {
-void CUFRegisterAllocator();
+RT_API_ATTRS void CUFRegisterAllocator();
-void *CUFAllocPinned(std::size_t);
-void CUFFreePinned(void *);
+RT_API_ATTRS void *CUFAllocPinned(std::size_t);
+RT_API_ATTRS void CUFFreePinned(void *);
-void *CUFAllocDevice(std::size_t);
-void CUFFreeDevice(void *);
+RT_API_ATTRS void *CUFAllocDevice(std::size_t);
+RT_API_ATTRS void CUFFreeDevice(void *);
-void *CUFAllocManaged(std::size_t);
-void CUFFreeManaged(void *);
+RT_API_ATTRS void *CUFAllocManaged(std::size_t);
+RT_API_ATTRS void CUFFreeManaged(void *);
-void *CUFAllocUnified(std::size_t);
-void CUFFreeUnified(void *);
+RT_API_ATTRS void *CUFAllocUnified(std::size_t);
+RT_API_ATTRS void CUFFreeUnified(void *);
} // namespace Fortran::runtime::cuda
#endif // FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_
diff --git a/flang/runtime/CUDA/allocator.cpp b/flang/runtime/CUDA/allocator.cpp
index cd00d40361d28..ff645e27f4f6d 100644
--- a/flang/runtime/CUDA/allocator.cpp
+++ b/flang/runtime/CUDA/allocator.cpp
@@ -19,7 +19,7 @@
namespace Fortran::runtime::cuda {
-void CUFRegisterAllocator() {
+RT_API_ATTRS void CUFRegisterAllocator() {
allocatorRegistry.Register(
kPinnedAllocatorPos, {&CUFAllocPinned, CUFFreePinned});
allocatorRegistry.Register(
@@ -30,41 +30,43 @@ void CUFRegisterAllocator() {
kUnifiedAllocatorPos, {&CUFAllocUnified, CUFFreeUnified});
}
-void *CUFAllocPinned(std::size_t sizeInBytes) {
+RT_API_ATTRS void *CUFAllocPinned(std::size_t sizeInBytes) {
void *p;
CUDA_REPORT_IF_ERROR(cuMemAllocHost(&p, sizeInBytes));
return p;
}
-void CUFFreePinned(void *p) { CUDA_REPORT_IF_ERROR(cuMemFreeHost(p)); }
+RT_API_ATTRS void CUFFreePinned(void *p) {
+ CUDA_REPORT_IF_ERROR(cuMemFreeHost(p));
+}
-void *CUFAllocDevice(std::size_t sizeInBytes) {
+RT_API_ATTRS void *CUFAllocDevice(std::size_t sizeInBytes) {
CUdeviceptr p = 0;
CUDA_REPORT_IF_ERROR(cuMemAlloc(&p, sizeInBytes));
return reinterpret_cast<void *>(p);
}
-void CUFFreeDevice(void *p) {
+RT_API_ATTRS void CUFFreeDevice(void *p) {
CUDA_REPORT_IF_ERROR(cuMemFree(reinterpret_cast<CUdeviceptr>(p)));
}
-void *CUFAllocManaged(std::size_t sizeInBytes) {
+RT_API_ATTRS void *CUFAllocManaged(std::size_t sizeInBytes) {
CUdeviceptr p = 0;
CUDA_REPORT_IF_ERROR(
cuMemAllocManaged(&p, sizeInBytes, CU_MEM_ATTACH_GLOBAL));
return reinterpret_cast<void *>(p);
}
-void CUFFreeManaged(void *p) {
+RT_API_ATTRS void CUFFreeManaged(void *p) {
CUDA_REPORT_IF_ERROR(cuMemFree(reinterpret_cast<CUdeviceptr>(p)));
}
-void *CUFAllocUnified(std::size_t sizeInBytes) {
+RT_API_ATTRS void *CUFAllocUnified(std::size_t sizeInBytes) {
// Call alloc managed for the time being.
return CUFAllocManaged(sizeInBytes);
}
-void CUFFreeUnified(void *p) {
+RT_API_ATTRS void CUFFreeUnified(void *p) {
// Call free managed for the time being.
CUFFreeManaged(p);
}
``````````
</details>
https://github.com/llvm/llvm-project/pull/102235
More information about the flang-commits
mailing list