[llvm-branch-commits] [flang] [flang][cuda] Add CUF allocator (PR #101216)
Valentin Clement バレンタイン クレメン via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Jul 30 13:48:54 PDT 2024
https://github.com/clementval updated https://github.com/llvm/llvm-project/pull/101216
>From 825e6efbbe20041b2b1591617f32abc12a0b42ff Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Fri, 12 Jul 2024 15:20:12 -0700
Subject: [PATCH 1/2] [flang][cuda] Add CUF allocator
---
flang/CMakeLists.txt | 7 ++
flang/include/flang/Runtime/CUDA/allocator.h | 43 +++++++++
flang/runtime/CMakeLists.txt | 3 +
flang/runtime/CUDA/CMakeLists.txt | 18 ++++
flang/runtime/CUDA/allocator.cpp | 62 +++++++++++++
flang/unittests/Runtime/CMakeLists.txt | 2 +
flang/unittests/Runtime/CUDA/AllocatorCUF.cpp | 87 +++++++++++++++++++
flang/unittests/Runtime/CUDA/CMakeLists.txt | 15 ++++
8 files changed, 237 insertions(+)
create mode 100644 flang/include/flang/Runtime/CUDA/allocator.h
create mode 100644 flang/runtime/CUDA/CMakeLists.txt
create mode 100644 flang/runtime/CUDA/allocator.cpp
create mode 100644 flang/unittests/Runtime/CUDA/AllocatorCUF.cpp
create mode 100644 flang/unittests/Runtime/CUDA/CMakeLists.txt
diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt
index 070c39eb6e9ab..971e5d5c93f23 100644
--- a/flang/CMakeLists.txt
+++ b/flang/CMakeLists.txt
@@ -461,6 +461,13 @@ option(FLANG_BUILD_TOOLS
if (FLANG_BUILD_TOOLS)
add_subdirectory(tools)
endif()
+
+option(FLANG_CUF_RUNTIME
+ "Compile CUDA Fortran runtime sources" OFF)
+if (FLANG_CUF_RUNTIME)
+ find_package(CUDAToolkit REQUIRED)
+endif()
+
add_subdirectory(runtime)
if (LLVM_INCLUDE_EXAMPLES)
diff --git a/flang/include/flang/Runtime/CUDA/allocator.h b/flang/include/flang/Runtime/CUDA/allocator.h
new file mode 100644
index 0000000000000..0738d1e3a8bf3
--- /dev/null
+++ b/flang/include/flang/Runtime/CUDA/allocator.h
@@ -0,0 +1,43 @@
+//===-- include/flang/Runtime/CUDA/allocator.h ------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_
+#define FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_
+
+#include "flang/Runtime/descriptor.h"
+
+static constexpr unsigned kPinnedAllocatorPos = 1;
+static constexpr unsigned kDeviceAllocatorPos = 2;
+static constexpr unsigned kManagedAllocatorPos = 3;
+
+#define CUDA_REPORT_IF_ERROR(expr) \
+ [](CUresult result) { \
+ if (!result) \
+ return; \
+ const char *name = nullptr; \
+ cuGetErrorName(result, &name); \
+ if (!name) \
+ name = "<unknown>"; \
+ fprintf(stderr, "'%s' failed with '%s'\n", #expr, name); \
+ }(expr)
+
+namespace Fortran::runtime::cuf {
+
+void CUFRegisterAllocator();
+
+void *CUFAllocPinned(std::size_t);
+void CUFFreePinned(void *);
+
+void *CUFAllocDevice(std::size_t);
+void CUFFreeDevice(void *);
+
+void *CUFAllocManaged(std::size_t);
+void CUFFreeManaged(void *);
+
+} // namespace Fortran::runtime::cuf
+#endif // FORTRAN_RUNTIME_CUDA_ALLOCATOR_H_
diff --git a/flang/runtime/CMakeLists.txt b/flang/runtime/CMakeLists.txt
index 1f3ae23dcbf12..4537b2d059d65 100644
--- a/flang/runtime/CMakeLists.txt
+++ b/flang/runtime/CMakeLists.txt
@@ -309,3 +309,6 @@ if (TARGET flang-new AND TARGET module_files)
add_dependencies(FortranRuntime flang-new module_files)
endif()
+if (FLANG_CUF_RUNTIME)
+ add_subdirectory(CUDA)
+endif()
diff --git a/flang/runtime/CUDA/CMakeLists.txt b/flang/runtime/CUDA/CMakeLists.txt
new file mode 100644
index 0000000000000..e963b6062abc4
--- /dev/null
+++ b/flang/runtime/CUDA/CMakeLists.txt
@@ -0,0 +1,18 @@
+#===-- runtime/CUDA/CMakeLists.txt -----------------------------------------===#
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+#===------------------------------------------------------------------------===#
+
+include_directories(${CUDAToolkit_INCLUDE_DIRS})
+find_library(CUDA_RUNTIME_LIBRARY cuda HINTS ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES} REQUIRED)
+
+add_flang_library(CufRuntime
+ allocator.cpp
+)
+target_link_libraries(CufRuntime
+PRIVATE
+${CUDA_RUNTIME_LIBRARY}
+)
diff --git a/flang/runtime/CUDA/allocator.cpp b/flang/runtime/CUDA/allocator.cpp
new file mode 100644
index 0000000000000..3c913e344335b
--- /dev/null
+++ b/flang/runtime/CUDA/allocator.cpp
@@ -0,0 +1,62 @@
+//===-- runtime/CUDA/allocator.cpp ----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Runtime/CUDA/allocator.h"
+#include "../allocator-registry.h"
+#include "../derived.h"
+#include "../stat.h"
+#include "../terminator.h"
+#include "../type-info.h"
+#include "flang/Common/Fortran.h"
+#include "flang/ISO_Fortran_binding_wrapper.h"
+
+#include "cuda.h"
+
+namespace Fortran::runtime::cuf {
+
+void CUFRegisterAllocator() {
+ allocatorRegistry.Register(
+ kPinnedAllocatorPos, {&CUFAllocPinned, CUFFreePinned});
+ allocatorRegistry.Register(
+ kDeviceAllocatorPos, {&CUFAllocDevice, CUFFreeDevice});
+ allocatorRegistry.Register(
+ kManagedAllocatorPos, {&CUFAllocManaged, CUFFreeManaged});
+}
+
+void *CUFAllocPinned(std::size_t sizeInBytes) {
+ void *p;
+ CUDA_REPORT_IF_ERROR(cuMemAllocHost(&p, sizeInBytes));
+ return p;
+}
+
+void CUFFreePinned(void *p) {
+ CUDA_REPORT_IF_ERROR(cuMemFree(reinterpret_cast<CUdeviceptr>(p)));
+}
+
+void *CUFAllocDevice(std::size_t sizeInBytes) {
+ CUdeviceptr p = 0;
+ CUDA_REPORT_IF_ERROR(cuMemAlloc(&p, sizeInBytes));
+ return reinterpret_cast<void *>(p);
+}
+
+void CUFFreeDevice(void *p) {
+ CUDA_REPORT_IF_ERROR(cuMemFree(reinterpret_cast<CUdeviceptr>(p)));
+}
+
+void *CUFAllocManaged(std::size_t sizeInBytes) {
+ CUdeviceptr p = 0;
+ CUDA_REPORT_IF_ERROR(
+ cuMemAllocManaged(&p, sizeInBytes, CU_MEM_ATTACH_GLOBAL));
+ return reinterpret_cast<void *>(p);
+}
+
+void CUFFreeManaged(void *p) {
+ CUDA_REPORT_IF_ERROR(cuMemFree(reinterpret_cast<CUdeviceptr>(p)));
+}
+
+} // namespace Fortran::runtime::cuf
diff --git a/flang/unittests/Runtime/CMakeLists.txt b/flang/unittests/Runtime/CMakeLists.txt
index ed047b08ada35..2c3f8c1a9e9ac 100644
--- a/flang/unittests/Runtime/CMakeLists.txt
+++ b/flang/unittests/Runtime/CMakeLists.txt
@@ -35,3 +35,5 @@ target_link_libraries(FlangRuntimeTests
PRIVATE
FortranRuntime
)
+
+add_subdirectory(CUDA)
diff --git a/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp b/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp
new file mode 100644
index 0000000000000..204826d3f2a96
--- /dev/null
+++ b/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp
@@ -0,0 +1,87 @@
+//===-- flang/unittests/Runtime/AllocatableCUF.cpp ---------------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+#include "flang/Common/Fortran.h"
+#include "flang/Runtime/CUDA/allocator.h"
+#include "flang/Runtime/allocatable.h"
+
+#include "cuda.h"
+
+using namespace Fortran::runtime;
+
+static OwningPtr<Descriptor> createAllocatable(
+ Fortran::common::TypeCategory tc, int kind, int rank = 1) {
+ return Descriptor::Create(TypeCode{tc, kind}, kind, nullptr, rank, nullptr,
+ CFI_attribute_allocatable);
+}
+
+thread_local static int32_t defaultDevice = 0;
+
+CUdevice getDefaultCuDevice() {
+ CUdevice device;
+ CUDA_REPORT_IF_ERROR(cuDeviceGet(&device, /*ordinal=*/defaultDevice));
+ return device;
+}
+
+class ScopedContext {
+public:
+ ScopedContext() {
+ // Static reference to CUDA primary context for device ordinal
+ // defaultDevice.
+ static CUcontext context = [] {
+ CUDA_REPORT_IF_ERROR(cuInit(/*flags=*/0));
+ CUcontext ctx;
+ // Note: this does not affect the current context.
+ CUDA_REPORT_IF_ERROR(
+ cuDevicePrimaryCtxRetain(&ctx, getDefaultCuDevice()));
+ return ctx;
+ }();
+
+ CUDA_REPORT_IF_ERROR(cuCtxPushCurrent(context));
+ }
+
+ ~ScopedContext() { CUDA_REPORT_IF_ERROR(cuCtxPopCurrent(nullptr)); }
+};
+
+TEST(AllocatableCUFTest, SimpleDeviceAllocate) {
+ using Fortran::common::TypeCategory;
+ Fortran::runtime::cuf::CUFRegisterAllocator();
+ ScopedContext ctx;
+ // REAL(4), DEVICE, ALLOCATABLE :: a(:)
+ auto a{createAllocatable(TypeCategory::Real, 4)};
+ a->raw().SetAllocIdx(kDeviceAllocatorPos);
+ EXPECT_EQ((int)kDeviceAllocatorPos, a->raw().GetAllocIdx());
+ EXPECT_FALSE(a->raw().HasAddendum());
+ RTNAME(AllocatableSetBounds)(*a, 0, 1, 10);
+ RTNAME(AllocatableAllocate)
+ (*a, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__);
+ EXPECT_TRUE(a->IsAllocated());
+ RTNAME(AllocatableDeallocate)
+ (*a, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__);
+ EXPECT_FALSE(a->IsAllocated());
+}
+
+TEST(AllocatableCUFTest, SimplePinnedAllocate) {
+ using Fortran::common::TypeCategory;
+ Fortran::runtime::cuf::CUFRegisterAllocator();
+ ScopedContext ctx;
+ // INTEGER(4), PINNED, ALLOCATABLE :: a(:)
+ auto a{createAllocatable(TypeCategory::Integer, 4)};
+ EXPECT_FALSE(a->raw().HasAddendum());
+ a->raw().SetAllocIdx(kPinnedAllocatorPos);
+ EXPECT_EQ((int)kPinnedAllocatorPos, a->raw().GetAllocIdx());
+ EXPECT_FALSE(a->raw().HasAddendum());
+ RTNAME(AllocatableSetBounds)(*a, 0, 1, 10);
+ RTNAME(AllocatableAllocate)
+ (*a, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__);
+ EXPECT_TRUE(a->IsAllocated());
+ RTNAME(AllocatableDeallocate)
+ (*a, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__);
+ EXPECT_FALSE(a->IsAllocated());
+}
diff --git a/flang/unittests/Runtime/CUDA/CMakeLists.txt b/flang/unittests/Runtime/CUDA/CMakeLists.txt
new file mode 100644
index 0000000000000..14b5c788719b8
--- /dev/null
+++ b/flang/unittests/Runtime/CUDA/CMakeLists.txt
@@ -0,0 +1,15 @@
+if (FLANG_CUF_RUNTIME)
+
+add_flang_unittest(FlangCufRuntimeTests
+ AllocatorCUF.cpp
+)
+
+target_link_libraries(FlangCufRuntimeTests
+ PRIVATE
+ CufRuntime
+ FortranRuntime
+)
+
+target_include_directories(FlangCufRuntimeTests PRIVATE ${CUDAToolkit_INCLUDE_DIRS})
+
+endif()
>From 86ce320d39a6ea0fd25fad592453bdc2033e6103 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Tue, 30 Jul 2024 13:48:42 -0700
Subject: [PATCH 2/2] Use Terminator and switch pinned deallocator to
cuFreeMemHost
---
flang/include/flang/Runtime/CUDA/allocator.h | 3 ++-
flang/runtime/CUDA/allocator.cpp | 4 ++--
flang/unittests/Runtime/CUDA/AllocatorCUF.cpp | 1 +
3 files changed, 5 insertions(+), 3 deletions(-)
diff --git a/flang/include/flang/Runtime/CUDA/allocator.h b/flang/include/flang/Runtime/CUDA/allocator.h
index 0738d1e3a8bf3..9f6fb55bea744 100644
--- a/flang/include/flang/Runtime/CUDA/allocator.h
+++ b/flang/include/flang/Runtime/CUDA/allocator.h
@@ -23,7 +23,8 @@ static constexpr unsigned kManagedAllocatorPos = 3;
cuGetErrorName(result, &name); \
if (!name) \
name = "<unknown>"; \
- fprintf(stderr, "'%s' failed with '%s'\n", #expr, name); \
+ Terminator terminator{__FILE__, __LINE__}; \
+ terminator.Crash("'%s' failed with '%s'", #expr, name); \
}(expr)
namespace Fortran::runtime::cuf {
diff --git a/flang/runtime/CUDA/allocator.cpp b/flang/runtime/CUDA/allocator.cpp
index 3c913e344335b..899532a1a5e1c 100644
--- a/flang/runtime/CUDA/allocator.cpp
+++ b/flang/runtime/CUDA/allocator.cpp
@@ -6,11 +6,11 @@
//
//===----------------------------------------------------------------------===//
+#include "../terminator.h"
#include "flang/Runtime/CUDA/allocator.h"
#include "../allocator-registry.h"
#include "../derived.h"
#include "../stat.h"
-#include "../terminator.h"
#include "../type-info.h"
#include "flang/Common/Fortran.h"
#include "flang/ISO_Fortran_binding_wrapper.h"
@@ -35,7 +35,7 @@ void *CUFAllocPinned(std::size_t sizeInBytes) {
}
void CUFFreePinned(void *p) {
- CUDA_REPORT_IF_ERROR(cuMemFree(reinterpret_cast<CUdeviceptr>(p)));
+ CUDA_REPORT_IF_ERROR(cuMemFreeHost(p));
}
void *CUFAllocDevice(std::size_t sizeInBytes) {
diff --git a/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp b/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp
index 204826d3f2a96..caa62be6aa921 100644
--- a/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp
+++ b/flang/unittests/Runtime/CUDA/AllocatorCUF.cpp
@@ -10,6 +10,7 @@
#include "flang/Common/Fortran.h"
#include "flang/Runtime/CUDA/allocator.h"
#include "flang/Runtime/allocatable.h"
+#include "../../../runtime/terminator.h"
#include "cuda.h"
More information about the llvm-branch-commits
mailing list