[llvm] [flang][cuda] Allocate the dst descriptor in data transfer (PR #143437)
Valentin Clement バレンタイン クレメン via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 9 13:37:10 PDT 2025
https://github.com/clementval created https://github.com/llvm/llvm-project/pull/143437
In a test like:
```
integer, allocatable, device :: da(:)
allocate(a(200))
a = 2
da = a ! da is not allocated before data transfer is initiated. Allocate it with a
```
The reference compiler will allocate the data for the `da` descriptor so the data transfer can be done properly.
>From 4ff2de6c6b42f582bd3fa00ecee6b132c5764ca5 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Mon, 9 Jun 2025 13:34:13 -0700
Subject: [PATCH] [flang][cuda] Allocate the dst descriptor in data transfer
---
flang-rt/lib/cuda/memory.cpp | 5 +++++
flang-rt/unittests/Runtime/CUDA/Memory.cpp | 26 ++++++++++++++++++++++
2 files changed, 31 insertions(+)
diff --git a/flang-rt/lib/cuda/memory.cpp b/flang-rt/lib/cuda/memory.cpp
index 766f6847946cb..e4e13fa825f3b 100644
--- a/flang-rt/lib/cuda/memory.cpp
+++ b/flang-rt/lib/cuda/memory.cpp
@@ -105,6 +105,11 @@ void RTDECL(CUFDataTransferDescDesc)(Descriptor *dstDesc, Descriptor *srcDesc,
} else {
terminator.Crash("host to host copy not supported");
}
+ // Allocate dst descriptor if not allocated.
+ if (!dstDesc->IsAllocated()) {
+ dstDesc->ApplyMold(*srcDesc, dstDesc->rank());
+ dstDesc->Allocate(/*asyncObject=*/nullptr);
+ }
if ((srcDesc->rank() > 0) && (dstDesc->Elements() < srcDesc->Elements())) {
// Special case when rhs is bigger than lhs and both are contiguous arrays.
// In this case we do a simple ptr to ptr transfer with the size of lhs.
diff --git a/flang-rt/unittests/Runtime/CUDA/Memory.cpp b/flang-rt/unittests/Runtime/CUDA/Memory.cpp
index 7915baca6c203..f2e17870f7999 100644
--- a/flang-rt/unittests/Runtime/CUDA/Memory.cpp
+++ b/flang-rt/unittests/Runtime/CUDA/Memory.cpp
@@ -70,3 +70,29 @@ TEST(MemoryCUFTest, CUFDataTransferDescDesc) {
EXPECT_EQ(*host->ZeroBasedIndexedElement<std::int32_t>(i), (std::int32_t)i);
}
}
+
+TEST(MemoryCUFTest, CUFDataTransferDescDescDstNotAllocated) {
+ using Fortran::common::TypeCategory;
+ RTNAME(CUFRegisterAllocator)();
+ // INTEGER(4), DEVICE, ALLOCATABLE :: a(:)
+ auto dev{createAllocatable(TypeCategory::Integer, 4)};
+ dev->SetAllocIdx(kDeviceAllocatorPos);
+ EXPECT_EQ((int)kDeviceAllocatorPos, dev->GetAllocIdx());
+ EXPECT_FALSE(dev->IsAllocated());
+
+ // Create temp array to transfer to device.
+ auto x{MakeArray<TypeCategory::Integer, 4>(std::vector<int>{10},
+ std::vector<int32_t>{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})};
+ RTNAME(CUFDataTransferDescDesc)
+ (dev.get(), x.get(), kHostToDevice, __FILE__, __LINE__);
+
+ // Retrieve data from device.
+ auto host{MakeArray<TypeCategory::Integer, 4>(std::vector<int>{10},
+ std::vector<int32_t>{0, 0, 0, 0, 0, 0, 0, 0, 0, 0})};
+ RTNAME(CUFDataTransferDescDesc)
+ (host.get(), dev.get(), kDeviceToHost, __FILE__, __LINE__);
+
+ for (unsigned i = 0; i < 10; ++i) {
+ EXPECT_EQ(*host->ZeroBasedIndexedElement<std::int32_t>(i), (std::int32_t)i);
+ }
+}
More information about the llvm-commits
mailing list