[llvm] [flang][cuda] Allocate the dst descriptor in data transfer (PR #143437)

Valentin Clement バレンタイン クレメン via llvm-commits llvm-commits at lists.llvm.org
Mon Jun 9 13:37:10 PDT 2025


https://github.com/clementval created https://github.com/llvm/llvm-project/pull/143437

In a test like: 

```
integer, allocatable, device :: da(:)
allocate(a(200))
a = 2
da = a ! da is not allocated before data transfer is initiated. Allocate it with a
```

The reference compiler will allocate the data for the `da` descriptor so the data transfer can be done properly. 

>From 4ff2de6c6b42f582bd3fa00ecee6b132c5764ca5 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Mon, 9 Jun 2025 13:34:13 -0700
Subject: [PATCH] [flang][cuda] Allocate the dst descriptor in data transfer

---
 flang-rt/lib/cuda/memory.cpp               |  5 +++++
 flang-rt/unittests/Runtime/CUDA/Memory.cpp | 26 ++++++++++++++++++++++
 2 files changed, 31 insertions(+)

diff --git a/flang-rt/lib/cuda/memory.cpp b/flang-rt/lib/cuda/memory.cpp
index 766f6847946cb..e4e13fa825f3b 100644
--- a/flang-rt/lib/cuda/memory.cpp
+++ b/flang-rt/lib/cuda/memory.cpp
@@ -105,6 +105,11 @@ void RTDECL(CUFDataTransferDescDesc)(Descriptor *dstDesc, Descriptor *srcDesc,
   } else {
     terminator.Crash("host to host copy not supported");
   }
+  // Allocate dst descriptor if not allocated.
+  if (!dstDesc->IsAllocated()) {
+    dstDesc->ApplyMold(*srcDesc, dstDesc->rank());
+    dstDesc->Allocate(/*asyncObject=*/nullptr);
+  }
   if ((srcDesc->rank() > 0) && (dstDesc->Elements() < srcDesc->Elements())) {
     // Special case when rhs is bigger than lhs and both are contiguous arrays.
     // In this case we do a simple ptr to ptr transfer with the size of lhs.
diff --git a/flang-rt/unittests/Runtime/CUDA/Memory.cpp b/flang-rt/unittests/Runtime/CUDA/Memory.cpp
index 7915baca6c203..f2e17870f7999 100644
--- a/flang-rt/unittests/Runtime/CUDA/Memory.cpp
+++ b/flang-rt/unittests/Runtime/CUDA/Memory.cpp
@@ -70,3 +70,29 @@ TEST(MemoryCUFTest, CUFDataTransferDescDesc) {
     EXPECT_EQ(*host->ZeroBasedIndexedElement<std::int32_t>(i), (std::int32_t)i);
   }
 }
+
+TEST(MemoryCUFTest, CUFDataTransferDescDescDstNotAllocated) {
+  using Fortran::common::TypeCategory;
+  RTNAME(CUFRegisterAllocator)();
+  // INTEGER(4), DEVICE, ALLOCATABLE :: a(:)
+  auto dev{createAllocatable(TypeCategory::Integer, 4)};
+  dev->SetAllocIdx(kDeviceAllocatorPos);
+  EXPECT_EQ((int)kDeviceAllocatorPos, dev->GetAllocIdx());
+  EXPECT_FALSE(dev->IsAllocated());
+
+  // Create temp array to transfer to device.
+  auto x{MakeArray<TypeCategory::Integer, 4>(std::vector<int>{10},
+      std::vector<int32_t>{0, 1, 2, 3, 4, 5, 6, 7, 8, 9})};
+  RTNAME(CUFDataTransferDescDesc)
+  (dev.get(), x.get(), kHostToDevice, __FILE__, __LINE__);
+
+  // Retrieve data from device.
+  auto host{MakeArray<TypeCategory::Integer, 4>(std::vector<int>{10},
+      std::vector<int32_t>{0, 0, 0, 0, 0, 0, 0, 0, 0, 0})};
+  RTNAME(CUFDataTransferDescDesc)
+  (host.get(), dev.get(), kDeviceToHost, __FILE__, __LINE__);
+
+  for (unsigned i = 0; i < 10; ++i) {
+    EXPECT_EQ(*host->ZeroBasedIndexedElement<std::int32_t>(i), (std::int32_t)i);
+  }
+}



More information about the llvm-commits mailing list