[flang-commits] [flang] [flang][cuda] Data transfer with descriptor (PR #114598)
via flang-commits
flang-commits at lists.llvm.org
Fri Nov 1 12:35:57 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-flang-runtime
Author: Valentin Clement (バレンタイン クレメン) (clementval)
<details>
<summary>Changes</summary>
Reopen PR #<!-- -->114302 as it was automatically closed.
Review in #<!-- -->114302
---
Full diff: https://github.com/llvm/llvm-project/pull/114598.diff
2 Files Affected:
- (modified) flang/runtime/CUDA/memory.cpp (+32-2)
- (modified) flang/unittests/Runtime/CUDA/Memory.cpp (+40)
``````````diff
diff --git a/flang/runtime/CUDA/memory.cpp b/flang/runtime/CUDA/memory.cpp
index d03f1cc0e48d66..daf1db684a3d2e 100644
--- a/flang/runtime/CUDA/memory.cpp
+++ b/flang/runtime/CUDA/memory.cpp
@@ -9,10 +9,32 @@
#include "flang/Runtime/CUDA/memory.h"
#include "../terminator.h"
#include "flang/Runtime/CUDA/common.h"
+#include "flang/Runtime/assign.h"
#include "cuda_runtime.h"
namespace Fortran::runtime::cuda {
+static void *MemmoveHostToDevice(
+ void *dst, const void *src, std::size_t count) {
+ // TODO: Use cudaMemcpyAsync when we have support for stream.
+ CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, count, cudaMemcpyHostToDevice));
+ return dst;
+}
+
+static void *MemmoveDeviceToHost(
+ void *dst, const void *src, std::size_t count) {
+ // TODO: Use cudaMemcpyAsync when we have support for stream.
+ CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, count, cudaMemcpyDeviceToHost));
+ return dst;
+}
+
+static void *MemmoveDeviceToDevice(
+ void *dst, const void *src, std::size_t count) {
+ // TODO: Use cudaMemcpyAsync when we have support for stream.
+ CUDA_REPORT_IF_ERROR(cudaMemcpy(dst, src, count, cudaMemcpyDeviceToDevice));
+ return dst;
+}
+
extern "C" {
void *RTDEF(CUFMemAlloc)(
@@ -90,8 +112,16 @@ void RTDEF(CUFDataTransferPtrDesc)(void *addr, Descriptor *desc,
void RTDECL(CUFDataTransferDescDesc)(Descriptor *dstDesc, Descriptor *srcDesc,
unsigned mode, const char *sourceFile, int sourceLine) {
Terminator terminator{sourceFile, sourceLine};
- terminator.Crash(
- "not yet implemented: CUDA data transfer between two descriptors");
+ MemmoveFct memmoveFct;
+ if (mode == kHostToDevice) {
+ memmoveFct = &MemmoveHostToDevice;
+ } else if (mode == kDeviceToHost) {
+ memmoveFct = &MemmoveDeviceToHost;
+ } else if (mode == kDeviceToDevice) {
+ memmoveFct = &MemmoveDeviceToDevice;
+ }
+ Fortran::runtime::Assign(
+ *dstDesc, *srcDesc, terminator, MaybeReallocate, memmoveFct);
}
}
} // namespace Fortran::runtime::cuda
diff --git a/flang/unittests/Runtime/CUDA/Memory.cpp b/flang/unittests/Runtime/CUDA/Memory.cpp
index 157d3cdb531def..3492b72aac0919 100644
--- a/flang/unittests/Runtime/CUDA/Memory.cpp
+++ b/flang/unittests/Runtime/CUDA/Memory.cpp
@@ -9,11 +9,17 @@
#include "flang/Runtime/CUDA/memory.h"
#include "gtest/gtest.h"
#include "../../../runtime/terminator.h"
+#include "../tools.h"
#include "flang/Common/Fortran.h"
+#include "flang/Runtime/CUDA/allocator.h"
#include "flang/Runtime/CUDA/common.h"
+#include "flang/Runtime/CUDA/descriptor.h"
+#include "flang/Runtime/allocatable.h"
+#include "flang/Runtime/allocator-registry.h"
#include "cuda_runtime.h"
+using namespace Fortran::runtime;
using namespace Fortran::runtime::cuda;
TEST(MemoryCUFTest, SimpleAllocTramsferFree) {
@@ -29,3 +35,37 @@ TEST(MemoryCUFTest, SimpleAllocTramsferFree) {
EXPECT_EQ(42, host);
RTNAME(CUFMemFree)((void *)dev, kMemTypeDevice, __FILE__, __LINE__);
}
+
+static OwningPtr<Descriptor> createAllocatable(
+ Fortran::common::TypeCategory tc, int kind, int rank = 1) {
+ return Descriptor::Create(TypeCode{tc, kind}, kind, nullptr, rank, nullptr,
+ CFI_attribute_allocatable);
+}
+
+TEST(MemoryCUFTest, CUFDataTransferDescDesc) {
+ using Fortran::common::TypeCategory;
+ RTNAME(CUFRegisterAllocator)();
+ // INTEGER(4), DEVICE, ALLOCATABLE :: a(:)
+ auto dev{createAllocatable(TypeCategory::Integer, 4)};
+ dev->SetAllocIdx(kDeviceAllocatorPos);
+ EXPECT_EQ((int)kDeviceAllocatorPos, dev->GetAllocIdx());
+ RTNAME(AllocatableSetBounds)(*dev, 0, 1, 10);
+ RTNAME(AllocatableAllocate)
+ (*dev, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__);
+ EXPECT_TRUE(dev->IsAllocated());
+
+ // Create temp array to transfer to device.
+ auto x{MakeArray<TypeCategory::Integer, 4>(std::vector<int>{10},
+ std::vector<int32_t>{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})};
+ RTNAME(CUFDataTransferDescDesc)(dev.get(), x.get(), kHostToDevice, __FILE__, __LINE__);
+
+ // Retrieve data from device.
+ auto host{MakeArray<TypeCategory::Integer, 4>(std::vector<int>{10},
+ std::vector<int32_t>{0, 0, 0, 0, 0, 0, 0, 0, 0, 0})};
+ RTNAME(CUFDataTransferDescDesc)
+ (host.get(), dev.get(), kDeviceToHost, __FILE__, __LINE__);
+
+ for (unsigned i = 0; i < 10; ++i) {
+ EXPECT_EQ(*host->ZeroBasedIndexedElement<std::int32_t>(i), (std::int32_t)i);
+ }
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/114598
More information about the flang-commits
mailing list