[flang-commits] [flang] [flang][cuda] Add entry point for alloc/free and simple copy (PR #109867)
Valentin Clement バレンタイン クレメン via flang-commits
flang-commits at lists.llvm.org
Tue Sep 24 17:33:14 PDT 2024
https://github.com/clementval updated https://github.com/llvm/llvm-project/pull/109867
>From 8e4611e5dbeda288432ba53a61bfe1452cedfa93 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Tue, 24 Sep 2024 14:21:31 -0700
Subject: [PATCH 1/5] [flang][cuda] Add entry point for alloc/free and simple
copy
---
flang/include/flang/Runtime/CUDA/memory.h | 12 +++++++++
flang/runtime/CUDA/memory.cpp | 29 ++++++++++++++++++++
flang/unittests/Runtime/CUDA/CMakeLists.txt | 1 +
flang/unittests/Runtime/CUDA/Memory.cpp | 30 +++++++++++++++++++++
4 files changed, 72 insertions(+)
create mode 100644 flang/unittests/Runtime/CUDA/Memory.cpp
diff --git a/flang/include/flang/Runtime/CUDA/memory.h b/flang/include/flang/Runtime/CUDA/memory.h
index 33947248dc4831..2fc28ed1567b89 100644
--- a/flang/include/flang/Runtime/CUDA/memory.h
+++ b/flang/include/flang/Runtime/CUDA/memory.h
@@ -17,12 +17,24 @@ namespace Fortran::runtime::cuda {
extern "C" {
+/// Allocate memory on the device.
+void *RTDECL(CUFMemAlloc)(
+ std::size_t bytes, const char *sourceFile = nullptr, int sourceLine = 0);
+
+/// Free memory allocated on the device.
+void RTDECL(CUFMemFree)(
+ void *devicePtr, const char *sourceFile = nullptr, int sourceLine = 0);
+
/// Set value to the data hold by a descriptor. The \p value pointer must be
/// addressable to the same amount of bytes specified by the element size of
/// the descriptor \p desc.
void RTDECL(CUFMemsetDescriptor)(const Descriptor &desc, void *value,
const char *sourceFile = nullptr, int sourceLine = 0);
+/// Data transfer from a pointer to a pointer.
+void RTDECL(CUFDataTransferPtrPtr)(void *dst, void *src, std::size_t bytes,
+ unsigned mode, const char *sourceFile = nullptr, int sourceLine = 0);
+
/// Data transfer from a pointer to a descriptor.
void RTDECL(CUFDataTransferDescPtr)(const Descriptor &dst, void *src,
std::size_t bytes, unsigned mode, const char *sourceFile = nullptr,
diff --git a/flang/runtime/CUDA/memory.cpp b/flang/runtime/CUDA/memory.cpp
index a287fa14a48789..171203a93be2a9 100644
--- a/flang/runtime/CUDA/memory.cpp
+++ b/flang/runtime/CUDA/memory.cpp
@@ -8,12 +8,25 @@
#include "flang/Runtime/CUDA/memory.h"
#include "../terminator.h"
+#include "flang/Runtime/CUDA/common.h"
#include "cuda_runtime.h"
namespace Fortran::runtime::cuda {
extern "C" {
+void *RTDEF(CUFMemAlloc)(
+ std::size_t bytes, const char *sourceFile, int sourceLine) {
+ void *ptr;
+ if (bytes != 0)
+ CUDA_REPORT_IF_ERROR(cudaMalloc((void **)&ptr, bytes));
+ return ptr;
+}
+
+void RTDEF(CUFMemFree)(void *ptr, const char *sourceFile, int sourceLine) {
+ CUDA_REPORT_IF_ERROR(cudaFree(ptr));
+}
+
void RTDEF(CUFMemsetDescriptor)(const Descriptor &desc, void *value,
const char *sourceFile, int sourceLine) {
Terminator terminator{sourceFile, sourceLine};
@@ -21,6 +34,22 @@ void RTDEF(CUFMemsetDescriptor)(const Descriptor &desc, void *value,
"value to a descriptor");
}
+void RTDEF(CUFDataTransferPtrPtr)(void *dst, void *src, std::size_t bytes,
+ unsigned mode, const char *sourceFile, int sourceLine) {
+ cudaMemcpyKind kind;
+ if (mode == kHostToDevice) {
+ kind = cudaMemcpyHostToDevice;
+ } else if (mode == kDeviceToHost) {
+ kind = cudaMemcpyDeviceToHost;
+ } else if (mode == kDeviceToDevice) {
+ kind = cudaMemcpyDeviceToDevice;
+ } else {
+ Terminator terminator{sourceFile, sourceLine};
+ terminator.Crash("host to host copy not supported");
+ }
+ CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, bytes, kind));
+}
+
void RTDEF(CUFDataTransferDescPtr)(const Descriptor &desc, void *addr,
std::size_t bytes, unsigned mode, const char *sourceFile, int sourceLine) {
Terminator terminator{sourceFile, sourceLine};
diff --git a/flang/unittests/Runtime/CUDA/CMakeLists.txt b/flang/unittests/Runtime/CUDA/CMakeLists.txt
index 30fb8c220233c0..a7fe604d687bdd 100644
--- a/flang/unittests/Runtime/CUDA/CMakeLists.txt
+++ b/flang/unittests/Runtime/CUDA/CMakeLists.txt
@@ -3,6 +3,7 @@ if (FLANG_CUF_RUNTIME)
add_flang_unittest(FlangCufRuntimeTests
Allocatable.cpp
AllocatorCUF.cpp
+ Memory.cpp
)
if (BUILD_SHARED_LIBS)
diff --git a/flang/unittests/Runtime/CUDA/Memory.cpp b/flang/unittests/Runtime/CUDA/Memory.cpp
new file mode 100644
index 00000000000000..95ae6c75e3eeeb
--- /dev/null
+++ b/flang/unittests/Runtime/CUDA/Memory.cpp
@@ -0,0 +1,30 @@
+//===-- flang/unittests/Runtime/Memory.cpp -----------------------*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Runtime/CUDA/memory.h"
+#include "gtest/gtest.h"
+#include "../../../runtime/terminator.h"
+#include "flang/Common/Fortran.h"
+#include "flang/Runtime/CUDA/common.h"
+
+#include "cuda_runtime.h"
+
+using namespace Fortran::runtime::cuda;
+
+TEST(MemoryCUFTest, SimpleAllocTramsferFree) {
+ int *dev = (int *)RTNAME(CUFMemAlloc)(sizeof(int), __FILE__, __LINE__);
+ EXPECT_TRUE(dev != 0);
+ int host = 42;
+ RTNAME(CUFDataTransferPtrPtr)((void *)dev, (void *)&host, sizeof(int),
+ kHostToDevice, __FILE__, __LINE__);
+ host = 0;
+ RTNAME(CUFDataTransferPtrPtr)((void *)&host, (void *)dev, sizeof(int),
+ kDeviceToHost, __FILE__, __LINE__);
+ EXPECT_EQ(42, host);
+ RTNAME(CUFMemFree)((void *)dev, __FILE__, __LINE__);
+}
>From 1e350fc7fa9fa646439c656de5e5f5de34abf456 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Tue, 24 Sep 2024 14:23:24 -0700
Subject: [PATCH 2/5] Add comment
---
flang/runtime/CUDA/memory.cpp | 1 +
1 file changed, 1 insertion(+)
diff --git a/flang/runtime/CUDA/memory.cpp b/flang/runtime/CUDA/memory.cpp
index 171203a93be2a9..6ac7607c6fb179 100644
--- a/flang/runtime/CUDA/memory.cpp
+++ b/flang/runtime/CUDA/memory.cpp
@@ -47,6 +47,7 @@ void RTDEF(CUFDataTransferPtrPtr)(void *dst, void *src, std::size_t bytes,
Terminator terminator{sourceFile, sourceLine};
terminator.Crash("host to host copy not supported");
}
+ // TODO: Use cudaMemcpyAsync when we have support for stream.
CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, bytes, kind));
}
>From 20f1abbdc224d509b7abac77095ba57b76ad84dd Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Tue, 24 Sep 2024 14:28:44 -0700
Subject: [PATCH 3/5] clang-format
---
flang/unittests/Runtime/CUDA/Memory.cpp | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/flang/unittests/Runtime/CUDA/Memory.cpp b/flang/unittests/Runtime/CUDA/Memory.cpp
index 95ae6c75e3eeeb..08fb22ef2b74a3 100644
--- a/flang/unittests/Runtime/CUDA/Memory.cpp
+++ b/flang/unittests/Runtime/CUDA/Memory.cpp
@@ -20,11 +20,11 @@ TEST(MemoryCUFTest, SimpleAllocTramsferFree) {
int *dev = (int *)RTNAME(CUFMemAlloc)(sizeof(int), __FILE__, __LINE__);
EXPECT_TRUE(dev != 0);
int host = 42;
- RTNAME(CUFDataTransferPtrPtr)((void *)dev, (void *)&host, sizeof(int),
- kHostToDevice, __FILE__, __LINE__);
+ RTNAME(CUFDataTransferPtrPtr)
+ ((void *)dev, (void *)&host, sizeof(int), kHostToDevice, __FILE__, __LINE__);
host = 0;
- RTNAME(CUFDataTransferPtrPtr)((void *)&host, (void *)dev, sizeof(int),
- kDeviceToHost, __FILE__, __LINE__);
+ RTNAME(CUFDataTransferPtrPtr)
+ ((void *)&host, (void *)dev, sizeof(int), kDeviceToHost, __FILE__, __LINE__);
EXPECT_EQ(42, host);
RTNAME(CUFMemFree)((void *)dev, __FILE__, __LINE__);
}
>From 3ea06ed86ddaed8832fd61d7ebf3c80f8806d4e2 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Tue, 24 Sep 2024 14:55:50 -0700
Subject: [PATCH 4/5] Add memory type to alloc/free
---
flang/include/flang/Runtime/CUDA/common.h | 7 ++++++
flang/include/flang/Runtime/CUDA/memory.h | 8 +++---
flang/runtime/CUDA/memory.cpp | 30 +++++++++++++++++++----
flang/unittests/Runtime/CUDA/Memory.cpp | 5 ++--
4 files changed, 39 insertions(+), 11 deletions(-)
diff --git a/flang/include/flang/Runtime/CUDA/common.h b/flang/include/flang/Runtime/CUDA/common.h
index cb8681da161f0d..b73bc390ea8c9e 100644
--- a/flang/include/flang/Runtime/CUDA/common.h
+++ b/flang/include/flang/Runtime/CUDA/common.h
@@ -12,6 +12,13 @@
#include "flang/Runtime/descriptor.h"
#include "flang/Runtime/entry-names.h"
+/// Type of memory for allocation/deallocation
+static constexpr unsigned kMemTypeDevice = 0;
+static constexpr unsigned kMemTypeManaged = 1;
+static constexpr unsigned kMemTypeUnified = 2;
+static constexpr unsigned kMemTypePinned = 3;
+
+/// Data transfer kinds.
static constexpr unsigned kHostToDevice = 0;
static constexpr unsigned kDeviceToHost = 1;
static constexpr unsigned kDeviceToDevice = 2;
diff --git a/flang/include/flang/Runtime/CUDA/memory.h b/flang/include/flang/Runtime/CUDA/memory.h
index 2fc28ed1567b89..3c3ae73d4ad7a1 100644
--- a/flang/include/flang/Runtime/CUDA/memory.h
+++ b/flang/include/flang/Runtime/CUDA/memory.h
@@ -18,12 +18,12 @@ namespace Fortran::runtime::cuda {
extern "C" {
/// Allocate memory on the device.
-void *RTDECL(CUFMemAlloc)(
- std::size_t bytes, const char *sourceFile = nullptr, int sourceLine = 0);
+void *RTDECL(CUFMemAlloc)(std::size_t bytes, unsigned type,
+ const char *sourceFile = nullptr, int sourceLine = 0);
/// Free memory allocated on the device.
-void RTDECL(CUFMemFree)(
- void *devicePtr, const char *sourceFile = nullptr, int sourceLine = 0);
+void RTDECL(CUFMemFree)(void *devicePtr, unsigned type,
+ const char *sourceFile = nullptr, int sourceLine = 0);
/// Set value to the data hold by a descriptor. The \p value pointer must be
/// addressable to the same amount of bytes specified by the element size of
diff --git a/flang/runtime/CUDA/memory.cpp b/flang/runtime/CUDA/memory.cpp
index 6ac7607c6fb179..18501321520827 100644
--- a/flang/runtime/CUDA/memory.cpp
+++ b/flang/runtime/CUDA/memory.cpp
@@ -16,15 +16,35 @@ namespace Fortran::runtime::cuda {
extern "C" {
void *RTDEF(CUFMemAlloc)(
- std::size_t bytes, const char *sourceFile, int sourceLine) {
+ std::size_t bytes, unsigned type, const char *sourceFile, int sourceLine) {
void *ptr;
- if (bytes != 0)
- CUDA_REPORT_IF_ERROR(cudaMalloc((void **)&ptr, bytes));
+ if (bytes != 0) {
+ if (type == kMemTypeDevice) {
+ CUDA_REPORT_IF_ERROR(cudaMalloc((void **)&ptr, bytes));
+ } else if (type == kMemTypeManaged || type == kMemTypeUnified) {
+ CUDA_REPORT_IF_ERROR(
+ cudaMallocManaged((void **)&ptr, bytes, cudaMemAttachGlobal));
+ } else if (type == kMemTypePinned) {
+ CUDA_REPORT_IF_ERROR(cudaMallocHost((void **)&ptr, bytes));
+ } else {
+ Terminator terminator{sourceFile, sourceLine};
+ terminator.Crash("unsupported memory type");
+ }
+ }
return ptr;
}
-void RTDEF(CUFMemFree)(void *ptr, const char *sourceFile, int sourceLine) {
- CUDA_REPORT_IF_ERROR(cudaFree(ptr));
+void RTDEF(CUFMemFree)(
+ void *ptr, unsigned type, const char *sourceFile, int sourceLine) {
+ if (type == kMemTypeDevice || type == kMemTypeManaged ||
+ type == kMemTypeUnified) {
+ CUDA_REPORT_IF_ERROR(cudaFree(ptr));
+ } else if (type == kMemTypePinned) {
+ CUDA_REPORT_IF_ERROR(cudaFreeHost(ptr));
+ } else {
+ Terminator terminator{sourceFile, sourceLine};
+ terminator.Crash("unsupported memory type");
+ }
}
void RTDEF(CUFMemsetDescriptor)(const Descriptor &desc, void *value,
diff --git a/flang/unittests/Runtime/CUDA/Memory.cpp b/flang/unittests/Runtime/CUDA/Memory.cpp
index 08fb22ef2b74a3..157d3cdb531def 100644
--- a/flang/unittests/Runtime/CUDA/Memory.cpp
+++ b/flang/unittests/Runtime/CUDA/Memory.cpp
@@ -17,7 +17,8 @@
using namespace Fortran::runtime::cuda;
TEST(MemoryCUFTest, SimpleAllocTramsferFree) {
- int *dev = (int *)RTNAME(CUFMemAlloc)(sizeof(int), __FILE__, __LINE__);
+ int *dev = (int *)RTNAME(CUFMemAlloc)(
+ sizeof(int), kMemTypeDevice, __FILE__, __LINE__);
EXPECT_TRUE(dev != 0);
int host = 42;
RTNAME(CUFDataTransferPtrPtr)
@@ -26,5 +27,5 @@ TEST(MemoryCUFTest, SimpleAllocTramsferFree) {
RTNAME(CUFDataTransferPtrPtr)
((void *)&host, (void *)dev, sizeof(int), kDeviceToHost, __FILE__, __LINE__);
EXPECT_EQ(42, host);
- RTNAME(CUFMemFree)((void *)dev, __FILE__, __LINE__);
+ RTNAME(CUFMemFree)((void *)dev, kMemTypeDevice, __FILE__, __LINE__);
}
>From 9b6ff397daa8dac9762301c8602c01fbd9e6ef58 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Tue, 24 Sep 2024 15:32:03 -0700
Subject: [PATCH 5/5] Address review comment
---
flang/runtime/CUDA/memory.cpp | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/flang/runtime/CUDA/memory.cpp b/flang/runtime/CUDA/memory.cpp
index 18501321520827..fc48b4343eea9d 100644
--- a/flang/runtime/CUDA/memory.cpp
+++ b/flang/runtime/CUDA/memory.cpp
@@ -17,7 +17,7 @@ extern "C" {
void *RTDEF(CUFMemAlloc)(
std::size_t bytes, unsigned type, const char *sourceFile, int sourceLine) {
- void *ptr;
+ void *ptr = nullptr;
if (bytes != 0) {
if (type == kMemTypeDevice) {
CUDA_REPORT_IF_ERROR(cudaMalloc((void **)&ptr, bytes));
@@ -36,6 +36,8 @@ void *RTDEF(CUFMemAlloc)(
void RTDEF(CUFMemFree)(
void *ptr, unsigned type, const char *sourceFile, int sourceLine) {
+ if (!ptr)
+ return;
if (type == kMemTypeDevice || type == kMemTypeManaged ||
type == kMemTypeUnified) {
CUDA_REPORT_IF_ERROR(cudaFree(ptr));
More information about the flang-commits
mailing list