[flang-commits] [flang] 1417633 - [flang][cuda] Add CUF allocator (#101216)

Fri Aug 2 10:02:39 PDT 2024

Author: Valentin Clement (バレンタイン クレメン)
Date: 2024-08-02T10:02:34-07:00
New Revision: 1417633943b77365bda70b1ddddd46a0a3c05300

URL: https://github.com/llvm/llvm-project/commit/1417633943b77365bda70b1ddddd46a0a3c05300
DIFF: https://github.com/llvm/llvm-project/commit/1417633943b77365bda70b1ddddd46a0a3c05300.diff

LOG: [flang][cuda] Add CUF allocator (#101216)

Add allocators for CUDA fortran allocation on the device. 3 allocators
are added for pinned, device and managed/unified memory allocation.
`CUFRegisterAllocator()` is called to register the allocators in the
allocator registry added in #100690.


Since this require CUDA, a cmake option `FLANG_CUF_RUNTIME` is added to
conditionally build these.

Added: 
    flang/include/flang/Runtime/CUDA/allocator.h
    flang/runtime/CUDA/CMakeLists.txt
    flang/runtime/CUDA/allocator.cpp
    flang/unittests/Runtime/CUDA/AllocatorCUF.cpp
    flang/unittests/Runtime/CUDA/CMakeLists.txt

Modified: 
    flang/CMakeLists.txt
    flang/runtime/CMakeLists.txt
    flang/unittests/Runtime/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt
index 070c39eb6e9ab..971e5d5c93f23 100644

--- a/flang/CMakeLists.txt
+++ b/flang/CMakeLists.txt
@@ -461,6 +461,13 @@ option(FLANG_BUILD_TOOLS
 if (FLANG_BUILD_TOOLS)
   add_subdirectory(tools)
 endif()
+
+option(FLANG_CUF_RUNTIME
+  "Compile CUDA Fortran runtime sources" OFF)
+if (FLANG_CUF_RUNTIME)
+  find_package(CUDAToolkit REQUIRED)
+endif()
+
 add_subdirectory(runtime)
 
 if (LLVM_INCLUDE_EXAMPLES)

diff  --git a/flang/include/flang/Runtime/CUDA/allocator.h b/flang/include/flang/Runtime/CUDA/allocator.h
new file mode 100644
index 0000000000000..9f6fb55bea744
--- /dev/null
+++ b/flang/include/flang/Runtime/CUDA/allocator.h
@@ -0,0 +1,44 @@
+//===-- include/flang/Runtime/CUDA/allocator.h ------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_
+#define FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_
+
+#include "flang/Runtime/descriptor.h"
+
+static constexpr unsigned kPinnedAllocatorPos = 1;
+static constexpr unsigned kDeviceAllocatorPos = 2;
+static constexpr unsigned kManagedAllocatorPos = 3;
+
+#define CUDA_REPORT_IF_ERROR(expr) \
+  [](CUresult result) { \
+    if (!result) \
+      return; \
+    const char *name = nullptr; \
+    cuGetErrorName(result, &name); \
+    if (!name) \
+      name = "<unknown>"; \
+    Terminator terminator{__FILE__, __LINE__}; \
+    terminator.Crash("'%s' failed with '%s'", #expr, name); \
+  }(expr)
+
+namespace Fortran::runtime::cuf {
+
+void CUFRegisterAllocator();
+
+void *CUFAllocPinned(std::size_t);
+void CUFFreePinned(void *);
+
+void *CUFAllocDevice(std::size_t);
+void CUFFreeDevice(void *);
+
+void *CUFAllocManaged(std::size_t);
+void CUFFreeManaged(void *);
+
+} // namespace Fortran::runtime::cuf
+#endif // FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_

diff  --git a/flang/runtime/CMakeLists.txt b/flang/runtime/CMakeLists.txt
index 1f3ae23dcbf12..4537b2d059d65 100644
--- a/flang/runtime/CMakeLists.txt
+++ b/flang/runtime/CMakeLists.txt
@@ -309,3 +309,6 @@ if (TARGET flang-new AND TARGET module_files)
   add_dependencies(FortranRuntime flang-new module_files)
 endif()
 
+if (FLANG_CUF_RUNTIME)
+  add_subdirectory(CUDA)
+endif()

diff  --git a/flang/runtime/CUDA/CMakeLists.txt b/flang/runtime/CUDA/CMakeLists.txt
new file mode 100644
index 0000000000000..de1104f07ce6c
--- /dev/null
+++ b/flang/runtime/CUDA/CMakeLists.txt
@@ -0,0 +1,19 @@
+#===-- runtime/CUDA/CMakeLists.txt -----------------------------------------===#
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+#===------------------------------------------------------------------------===#
+
+include_directories(${CUDAToolkit_INCLUDE_DIRS})
+find_library(CUDA_RUNTIME_LIBRARY cuda HINTS ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES} REQUIRED)
+
+add_flang_library(CufRuntime
+  allocator.cpp
+)
+target_link_libraries(CufRuntime
+  PRIVATE
+  FortranRuntime
+  ${CUDA_RUNTIME_LIBRARY}
+)

diff  --git a/flang/runtime/CUDA/allocator.cpp b/flang/runtime/CUDA/allocator.cpp
new file mode 100644
index 0000000000000..02eaba5636990
--- /dev/null
+++ b/flang/runtime/CUDA/allocator.cpp
@@ -0,0 +1,60 @@
+//===-- runtime/CUDA/allocator.cpp ----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Runtime/CUDA/allocator.h"
+#include "../allocator-registry.h"
+#include "../derived.h"
+#include "../stat.h"
+#include "../terminator.h"
+#include "../type-info.h"
+#include "flang/Common/Fortran.h"
+#include "flang/ISO_Fortran_binding_wrapper.h"
+
+#include "cuda.h"
+
+namespace Fortran::runtime::cuf {
+
+void CUFRegisterAllocator() {
+  allocatorRegistry.Register(
+      kPinnedAllocatorPos, {&CUFAllocPinned, CUFFreePinned});
+  allocatorRegistry.Register(
+      kDeviceAllocatorPos, {&CUFAllocDevice, CUFFreeDevice});
+  allocatorRegistry.Register(
+      kManagedAllocatorPos, {&CUFAllocManaged, CUFFreeManaged});
+}
+
+void *CUFAllocPinned(std::size_t sizeInBytes) {
+  void *p;
+  CUDA_REPORT_IF_ERROR(cuMemAllocHost(&p, sizeInBytes));
+  return p;
+}
+
+void CUFFreePinned(void *p) { CUDA_REPORT_IF_ERROR(cuMemFreeHost(p)); }
+
+void *CUFAllocDevice(std::size_t sizeInBytes) {
+  CUdeviceptr p = 0;
+  CUDA_REPORT_IF_ERROR(cuMemAlloc(&p, sizeInBytes));
+  return reinterpret_cast<void *>(p);
+}
+
+void CUFFreeDevice(void *p) {
+  CUDA_REPORT_IF_ERROR(cuMemFree(reinterpret_cast<CUdeviceptr>(p)));
+}
+
+void *CUFAllocManaged(std::size_t sizeInBytes) {
+  CUdeviceptr p = 0;
+  CUDA_REPORT_IF_ERROR(
+      cuMemAllocManaged(&p, sizeInBytes, CU_MEM_ATTACH_GLOBAL));
+  return reinterpret_cast<void *>(p);
+}
+
+void CUFFreeManaged(void *p) {
+  CUDA_REPORT_IF_ERROR(cuMemFree(reinterpret_cast<CUdeviceptr>(p)));
+}
+
+} // namespace Fortran::runtime::cuf

diff  --git a/flang/unittests/Runtime/CMakeLists.txt b/flang/unittests/Runtime/CMakeLists.txt
index ed047b08ada35..2c3f8c1a9e9ac 100644
--- a/flang/unittests/Runtime/CMakeLists.txt
+++ b/flang/unittests/Runtime/CMakeLists.txt
@@ -35,3 +35,5 @@ target_link_libraries(FlangRuntimeTests
   PRIVATE
   FortranRuntime
 )
+
+add_subdirectory(CUDA)

diff  --git a/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp b/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp
new file mode 100644
index 0000000000000..2a7c7fe25de85
--- /dev/null
+++ b/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp
@@ -0,0 +1,88 @@
+//===-- flang/unittests/Runtime/AllocatableCUF.cpp ---------------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+#include "../../../runtime/terminator.h"
+#include "flang/Common/Fortran.h"
+#include "flang/Runtime/CUDA/allocator.h"
+#include "flang/Runtime/allocatable.h"
+
+#include "cuda.h"
+
+using namespace Fortran::runtime;
+
+static OwningPtr<Descriptor> createAllocatable(
+    Fortran::common::TypeCategory tc, int kind, int rank = 1) {
+  return Descriptor::Create(TypeCode{tc, kind}, kind, nullptr, rank, nullptr,
+      CFI_attribute_allocatable);
+}
+
+thread_local static int32_t defaultDevice = 0;
+
+CUdevice getDefaultCuDevice() {
+  CUdevice device;
+  CUDA_REPORT_IF_ERROR(cuDeviceGet(&device, /*ordinal=*/defaultDevice));
+  return device;
+}
+
+class ScopedContext {
+public:
+  ScopedContext() {
+    // Static reference to CUDA primary context for device ordinal
+    // defaultDevice.
+    static CUcontext context = [] {
+      CUDA_REPORT_IF_ERROR(cuInit(/*flags=*/0));
+      CUcontext ctx;
+      // Note: this does not affect the current context.
+      CUDA_REPORT_IF_ERROR(
+          cuDevicePrimaryCtxRetain(&ctx, getDefaultCuDevice()));
+      return ctx;
+    }();
+
+    CUDA_REPORT_IF_ERROR(cuCtxPushCurrent(context));
+  }
+
+  ~ScopedContext() { CUDA_REPORT_IF_ERROR(cuCtxPopCurrent(nullptr)); }
+};
+
+TEST(AllocatableCUFTest, SimpleDeviceAllocate) {
+  using Fortran::common::TypeCategory;
+  Fortran::runtime::cuf::CUFRegisterAllocator();
+  ScopedContext ctx;
+  // REAL(4), DEVICE, ALLOCATABLE :: a(:)
+  auto a{createAllocatable(TypeCategory::Real, 4)};
+  a->SetAllocIdx(kDeviceAllocatorPos);
+  EXPECT_EQ((int)kDeviceAllocatorPos, a->GetAllocIdx());
+  EXPECT_FALSE(a->HasAddendum());
+  RTNAME(AllocatableSetBounds)(*a, 0, 1, 10);
+  RTNAME(AllocatableAllocate)
+  (*a, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__);
+  EXPECT_TRUE(a->IsAllocated());
+  RTNAME(AllocatableDeallocate)
+  (*a, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__);
+  EXPECT_FALSE(a->IsAllocated());
+}
+
+TEST(AllocatableCUFTest, SimplePinnedAllocate) {
+  using Fortran::common::TypeCategory;
+  Fortran::runtime::cuf::CUFRegisterAllocator();
+  ScopedContext ctx;
+  // INTEGER(4), PINNED, ALLOCATABLE :: a(:)
+  auto a{createAllocatable(TypeCategory::Integer, 4)};
+  EXPECT_FALSE(a->HasAddendum());
+  a->SetAllocIdx(kPinnedAllocatorPos);
+  EXPECT_EQ((int)kPinnedAllocatorPos, a->GetAllocIdx());
+  EXPECT_FALSE(a->HasAddendum());
+  RTNAME(AllocatableSetBounds)(*a, 0, 1, 10);
+  RTNAME(AllocatableAllocate)
+  (*a, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__);
+  EXPECT_TRUE(a->IsAllocated());
+  RTNAME(AllocatableDeallocate)
+  (*a, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__);
+  EXPECT_FALSE(a->IsAllocated());
+}

diff  --git a/flang/unittests/Runtime/CUDA/CMakeLists.txt b/flang/unittests/Runtime/CUDA/CMakeLists.txt
new file mode 100644
index 0000000000000..14b5c788719b8
--- /dev/null
+++ b/flang/unittests/Runtime/CUDA/CMakeLists.txt
@@ -0,0 +1,15 @@
+if (FLANG_CUF_RUNTIME)
+
+add_flang_unittest(FlangCufRuntimeTests
+  AllocatorCUF.cpp
+)
+
+target_link_libraries(FlangCufRuntimeTests
+  PRIVATE
+  CufRuntime
+  FortranRuntime
+)
+
+target_include_directories(FlangCufRuntimeTests PRIVATE ${CUDAToolkit_INCLUDE_DIRS})
+
+endif()