[flang-commits] [flang] 1417633 - [flang][cuda] Add CUF allocator (#101216)
via flang-commits
flang-commits at lists.llvm.org
Fri Aug 2 10:02:39 PDT 2024
Author: Valentin Clement (バレンタイン クレメン)
Date: 2024-08-02T10:02:34-07:00
New Revision: 1417633943b77365bda70b1ddddd46a0a3c05300
URL: https://github.com/llvm/llvm-project/commit/1417633943b77365bda70b1ddddd46a0a3c05300
DIFF: https://github.com/llvm/llvm-project/commit/1417633943b77365bda70b1ddddd46a0a3c05300.diff
LOG: [flang][cuda] Add CUF allocator (#101216)
Add allocators for CUDA fortran allocation on the device. 3 allocators
are added for pinned, device and managed/unified memory allocation.
`CUFRegisterAllocator()` is called to register the allocators in the
allocator registry added in #100690.
Since this require CUDA, a cmake option `FLANG_CUF_RUNTIME` is added to
conditionally build these.
Added:
flang/include/flang/Runtime/CUDA/allocator.h
flang/runtime/CUDA/CMakeLists.txt
flang/runtime/CUDA/allocator.cpp
flang/unittests/Runtime/CUDA/AllocatorCUF.cpp
flang/unittests/Runtime/CUDA/CMakeLists.txt
Modified:
flang/CMakeLists.txt
flang/runtime/CMakeLists.txt
flang/unittests/Runtime/CMakeLists.txt
Removed:
################################################################################
diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt
index 070c39eb6e9ab..971e5d5c93f23 100644
--- a/flang/CMakeLists.txt
+++ b/flang/CMakeLists.txt
@@ -461,6 +461,13 @@ option(FLANG_BUILD_TOOLS
if (FLANG_BUILD_TOOLS)
add_subdirectory(tools)
endif()
+
+option(FLANG_CUF_RUNTIME
+ "Compile CUDA Fortran runtime sources" OFF)
+if (FLANG_CUF_RUNTIME)
+ find_package(CUDAToolkit REQUIRED)
+endif()
+
add_subdirectory(runtime)
if (LLVM_INCLUDE_EXAMPLES)
diff --git a/flang/include/flang/Runtime/CUDA/allocator.h b/flang/include/flang/Runtime/CUDA/allocator.h
new file mode 100644
index 0000000000000..9f6fb55bea744
--- /dev/null
+++ b/flang/include/flang/Runtime/CUDA/allocator.h
@@ -0,0 +1,44 @@
+//===-- include/flang/Runtime/CUDA/allocator.h ------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_
+#define FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_
+
+#include "flang/Runtime/descriptor.h"
+
+static constexpr unsigned kPinnedAllocatorPos = 1;
+static constexpr unsigned kDeviceAllocatorPos = 2;
+static constexpr unsigned kManagedAllocatorPos = 3;
+
+#define CUDA_REPORT_IF_ERROR(expr) \
+ [](CUresult result) { \
+ if (!result) \
+ return; \
+ const char *name = nullptr; \
+ cuGetErrorName(result, &name); \
+ if (!name) \
+ name = "<unknown>"; \
+ Terminator terminator{__FILE__, __LINE__}; \
+ terminator.Crash("'%s' failed with '%s'", #expr, name); \
+ }(expr)
+
+namespace Fortran::runtime::cuf {
+
+void CUFRegisterAllocator();
+
+void *CUFAllocPinned(std::size_t);
+void CUFFreePinned(void *);
+
+void *CUFAllocDevice(std::size_t);
+void CUFFreeDevice(void *);
+
+void *CUFAllocManaged(std::size_t);
+void CUFFreeManaged(void *);
+
+} // namespace Fortran::runtime::cuf
+#endif // FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_
diff --git a/flang/runtime/CMakeLists.txt b/flang/runtime/CMakeLists.txt
index 1f3ae23dcbf12..4537b2d059d65 100644
--- a/flang/runtime/CMakeLists.txt
+++ b/flang/runtime/CMakeLists.txt
@@ -309,3 +309,6 @@ if (TARGET flang-new AND TARGET module_files)
add_dependencies(FortranRuntime flang-new module_files)
endif()
+if (FLANG_CUF_RUNTIME)
+ add_subdirectory(CUDA)
+endif()
diff --git a/flang/runtime/CUDA/CMakeLists.txt b/flang/runtime/CUDA/CMakeLists.txt
new file mode 100644
index 0000000000000..de1104f07ce6c
--- /dev/null
+++ b/flang/runtime/CUDA/CMakeLists.txt
@@ -0,0 +1,19 @@
+#===-- runtime/CUDA/CMakeLists.txt -----------------------------------------===#
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+#===------------------------------------------------------------------------===#
+
+include_directories(${CUDAToolkit_INCLUDE_DIRS})
+find_library(CUDA_RUNTIME_LIBRARY cuda HINTS ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES} REQUIRED)
+
+add_flang_library(CufRuntime
+ allocator.cpp
+)
+target_link_libraries(CufRuntime
+ PRIVATE
+ FortranRuntime
+ ${CUDA_RUNTIME_LIBRARY}
+)
diff --git a/flang/runtime/CUDA/allocator.cpp b/flang/runtime/CUDA/allocator.cpp
new file mode 100644
index 0000000000000..02eaba5636990
--- /dev/null
+++ b/flang/runtime/CUDA/allocator.cpp
@@ -0,0 +1,60 @@
+//===-- runtime/CUDA/allocator.cpp ----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Runtime/CUDA/allocator.h"
+#include "../allocator-registry.h"
+#include "../derived.h"
+#include "../stat.h"
+#include "../terminator.h"
+#include "../type-info.h"
+#include "flang/Common/Fortran.h"
+#include "flang/ISO_Fortran_binding_wrapper.h"
+
+#include "cuda.h"
+
+namespace Fortran::runtime::cuf {
+
+void CUFRegisterAllocator() {
+ allocatorRegistry.Register(
+ kPinnedAllocatorPos, {&CUFAllocPinned, CUFFreePinned});
+ allocatorRegistry.Register(
+ kDeviceAllocatorPos, {&CUFAllocDevice, CUFFreeDevice});
+ allocatorRegistry.Register(
+ kManagedAllocatorPos, {&CUFAllocManaged, CUFFreeManaged});
+}
+
+void *CUFAllocPinned(std::size_t sizeInBytes) {
+ void *p;
+ CUDA_REPORT_IF_ERROR(cuMemAllocHost(&p, sizeInBytes));
+ return p;
+}
+
+void CUFFreePinned(void *p) { CUDA_REPORT_IF_ERROR(cuMemFreeHost(p)); }
+
+void *CUFAllocDevice(std::size_t sizeInBytes) {
+ CUdeviceptr p = 0;
+ CUDA_REPORT_IF_ERROR(cuMemAlloc(&p, sizeInBytes));
+ return reinterpret_cast<void *>(p);
+}
+
+void CUFFreeDevice(void *p) {
+ CUDA_REPORT_IF_ERROR(cuMemFree(reinterpret_cast<CUdeviceptr>(p)));
+}
+
+void *CUFAllocManaged(std::size_t sizeInBytes) {
+ CUdeviceptr p = 0;
+ CUDA_REPORT_IF_ERROR(
+ cuMemAllocManaged(&p, sizeInBytes, CU_MEM_ATTACH_GLOBAL));
+ return reinterpret_cast<void *>(p);
+}
+
+void CUFFreeManaged(void *p) {
+ CUDA_REPORT_IF_ERROR(cuMemFree(reinterpret_cast<CUdeviceptr>(p)));
+}
+
+} // namespace Fortran::runtime::cuf
diff --git a/flang/unittests/Runtime/CMakeLists.txt b/flang/unittests/Runtime/CMakeLists.txt
index ed047b08ada35..2c3f8c1a9e9ac 100644
--- a/flang/unittests/Runtime/CMakeLists.txt
+++ b/flang/unittests/Runtime/CMakeLists.txt
@@ -35,3 +35,5 @@ target_link_libraries(FlangRuntimeTests
PRIVATE
FortranRuntime
)
+
+add_subdirectory(CUDA)
diff --git a/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp b/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp
new file mode 100644
index 0000000000000..2a7c7fe25de85
--- /dev/null
+++ b/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp
@@ -0,0 +1,88 @@
+//===-- flang/unittests/Runtime/AllocatableCUF.cpp ---------------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+#include "../../../runtime/terminator.h"
+#include "flang/Common/Fortran.h"
+#include "flang/Runtime/CUDA/allocator.h"
+#include "flang/Runtime/allocatable.h"
+
+#include "cuda.h"
+
+using namespace Fortran::runtime;
+
+static OwningPtr<Descriptor> createAllocatable(
+ Fortran::common::TypeCategory tc, int kind, int rank = 1) {
+ return Descriptor::Create(TypeCode{tc, kind}, kind, nullptr, rank, nullptr,
+ CFI_attribute_allocatable);
+}
+
+thread_local static int32_t defaultDevice = 0;
+
+CUdevice getDefaultCuDevice() {
+ CUdevice device;
+ CUDA_REPORT_IF_ERROR(cuDeviceGet(&device, /*ordinal=*/defaultDevice));
+ return device;
+}
+
+class ScopedContext {
+public:
+ ScopedContext() {
+ // Static reference to CUDA primary context for device ordinal
+ // defaultDevice.
+ static CUcontext context = [] {
+ CUDA_REPORT_IF_ERROR(cuInit(/*flags=*/0));
+ CUcontext ctx;
+ // Note: this does not affect the current context.
+ CUDA_REPORT_IF_ERROR(
+ cuDevicePrimaryCtxRetain(&ctx, getDefaultCuDevice()));
+ return ctx;
+ }();
+
+ CUDA_REPORT_IF_ERROR(cuCtxPushCurrent(context));
+ }
+
+ ~ScopedContext() { CUDA_REPORT_IF_ERROR(cuCtxPopCurrent(nullptr)); }
+};
+
+TEST(AllocatableCUFTest, SimpleDeviceAllocate) {
+ using Fortran::common::TypeCategory;
+ Fortran::runtime::cuf::CUFRegisterAllocator();
+ ScopedContext ctx;
+ // REAL(4), DEVICE, ALLOCATABLE :: a(:)
+ auto a{createAllocatable(TypeCategory::Real, 4)};
+ a->SetAllocIdx(kDeviceAllocatorPos);
+ EXPECT_EQ((int)kDeviceAllocatorPos, a->GetAllocIdx());
+ EXPECT_FALSE(a->HasAddendum());
+ RTNAME(AllocatableSetBounds)(*a, 0, 1, 10);
+ RTNAME(AllocatableAllocate)
+ (*a, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__);
+ EXPECT_TRUE(a->IsAllocated());
+ RTNAME(AllocatableDeallocate)
+ (*a, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__);
+ EXPECT_FALSE(a->IsAllocated());
+}
+
+TEST(AllocatableCUFTest, SimplePinnedAllocate) {
+ using Fortran::common::TypeCategory;
+ Fortran::runtime::cuf::CUFRegisterAllocator();
+ ScopedContext ctx;
+ // INTEGER(4), PINNED, ALLOCATABLE :: a(:)
+ auto a{createAllocatable(TypeCategory::Integer, 4)};
+ EXPECT_FALSE(a->HasAddendum());
+ a->SetAllocIdx(kPinnedAllocatorPos);
+ EXPECT_EQ((int)kPinnedAllocatorPos, a->GetAllocIdx());
+ EXPECT_FALSE(a->HasAddendum());
+ RTNAME(AllocatableSetBounds)(*a, 0, 1, 10);
+ RTNAME(AllocatableAllocate)
+ (*a, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__);
+ EXPECT_TRUE(a->IsAllocated());
+ RTNAME(AllocatableDeallocate)
+ (*a, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__);
+ EXPECT_FALSE(a->IsAllocated());
+}
diff --git a/flang/unittests/Runtime/CUDA/CMakeLists.txt b/flang/unittests/Runtime/CUDA/CMakeLists.txt
new file mode 100644
index 0000000000000..14b5c788719b8
--- /dev/null
+++ b/flang/unittests/Runtime/CUDA/CMakeLists.txt
@@ -0,0 +1,15 @@
+if (FLANG_CUF_RUNTIME)
+
+add_flang_unittest(FlangCufRuntimeTests
+ AllocatorCUF.cpp
+)
+
+target_link_libraries(FlangCufRuntimeTests
+ PRIVATE
+ CufRuntime
+ FortranRuntime
+)
+
+target_include_directories(FlangCufRuntimeTests PRIVATE ${CUDAToolkit_INCLUDE_DIRS})
+
+endif()
More information about the flang-commits
mailing list