[flang-commits] [flang] [flang][cuda] Defined allocator for unified data (PR #102189)

Tue Aug 6 10:49:05 PDT 2024

https://github.com/clementval created https://github.com/llvm/llvm-project/pull/102189

CUDA unified variable where set to use the same allocator than managed variable. This patch adds a specific allocator for the unified variables. Currently it will call the managed allocator underneath but we want to have the flexibility to change that in the future. 

>From a8497fa5257482278bb90a8e33ef7321fd0e2143 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Tue, 6 Aug 2024 10:47:18 -0700
Subject: [PATCH] [flang][cuda] Defined allocator for unified data

---
 flang/include/flang/Runtime/CUDA/allocator.h     |  3 +++
 flang/include/flang/Runtime/allocator-registry.h |  3 ++-
 flang/lib/Lower/ConvertVariable.cpp              |  5 +++--
 flang/runtime/CUDA/allocator.cpp                 | 12 ++++++++++++
 4 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/flang/include/flang/Runtime/CUDA/allocator.h b/flang/include/flang/Runtime/CUDA/allocator.h
index 46ff5dbe2f3853..70729c3d9f1888 100644
--- a/flang/include/flang/Runtime/CUDA/allocator.h
+++ b/flang/include/flang/Runtime/CUDA/allocator.h
@@ -36,5 +36,8 @@ void CUFFreeDevice(void *);
 void *CUFAllocManaged(std::size_t);
 void CUFFreeManaged(void *);
 
+void *CUFAllocUnified(std::size_t);
+void CUFFreeUnified(void *);
+
 } // namespace Fortran::runtime::cuf
 #endif // FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_
diff --git a/flang/include/flang/Runtime/allocator-registry.h b/flang/include/flang/Runtime/allocator-registry.h
index 209b4d2e44e9b7..acfada506fafc6 100644
--- a/flang/include/flang/Runtime/allocator-registry.h
+++ b/flang/include/flang/Runtime/allocator-registry.h
@@ -19,8 +19,9 @@ static constexpr unsigned kDefaultAllocator = 0;
 static constexpr unsigned kPinnedAllocatorPos = 1;
 static constexpr unsigned kDeviceAllocatorPos = 2;
 static constexpr unsigned kManagedAllocatorPos = 3;
+static constexpr unsigned kUnifiedAllocatorPos = 4;
 
-#define MAX_ALLOCATOR 5
+#define MAX_ALLOCATOR 7 // 3 bits are reserved in the descriptor.
 
 namespace Fortran::runtime {
 
diff --git a/flang/lib/Lower/ConvertVariable.cpp b/flang/lib/Lower/ConvertVariable.cpp
index 45389091b8164d..ffbbea238647ce 100644
--- a/flang/lib/Lower/ConvertVariable.cpp
+++ b/flang/lib/Lower/ConvertVariable.cpp
@@ -1860,9 +1860,10 @@ static unsigned getAllocatorIdx(const Fortran::semantics::Symbol &sym) {
       return kPinnedAllocatorPos;
     if (*cudaAttr == Fortran::common::CUDADataAttr::Device)
       return kDeviceAllocatorPos;
-    if (*cudaAttr == Fortran::common::CUDADataAttr::Managed ||
-        *cudaAttr == Fortran::common::CUDADataAttr::Unified)
+    if (*cudaAttr == Fortran::common::CUDADataAttr::Managed)
       return kManagedAllocatorPos;
+    if (*cudaAttr == Fortran::common::CUDADataAttr::Unified)
+      return kUnifiedAllocatorPos;
   }
   return kDefaultAllocator;
 }
diff --git a/flang/runtime/CUDA/allocator.cpp b/flang/runtime/CUDA/allocator.cpp
index 26a3c296962690..5292dd54322bd8 100644
--- a/flang/runtime/CUDA/allocator.cpp
+++ b/flang/runtime/CUDA/allocator.cpp
@@ -26,6 +26,8 @@ void CUFRegisterAllocator() {
       kDeviceAllocatorPos, {&CUFAllocDevice, CUFFreeDevice});
   allocatorRegistry.Register(
       kManagedAllocatorPos, {&CUFAllocManaged, CUFFreeManaged});
+  allocatorRegistry.Register(
+      kUnifiedAllocatorPos, {&CUFAllocUnified, CUFFreeUnified});
 }
 
 void *CUFAllocPinned(std::size_t sizeInBytes) {
@@ -57,4 +59,14 @@ void CUFFreeManaged(void *p) {
   CUDA_REPORT_IF_ERROR(cuMemFree(reinterpret_cast<CUdeviceptr>(p)));
 }
 
+void *CUFAllocUnified(std::size_t sizeInBytes) {
+  // Call alloc managed for the time being.
+  return CUFAllocManaged(sizeInBytes);
+}
+
+void CUFFreeUnified(void *p) {
+  // Call free managed for the time being.
+  CUFFreeManaged(p);
+}
+
 } // namespace Fortran::runtime::cuf