[flang] [llvm] [flang][cuda] Add CUFSetAssociatedStream entry point (PR #181313)

Valentin Clement バレンタイン クレメン via llvm-commits llvm-commits at lists.llvm.org
Fri Feb 13 10:59:31 PST 2026


https://github.com/clementval updated https://github.com/llvm/llvm-project/pull/181313

>From 03deb53cb502ac5e022bb25858cf722596047225 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Thu, 12 Feb 2026 22:15:20 -0800
Subject: [PATCH 1/3] [flang][cuda] Add CUFSetAssociatedStream entry point

---
 flang-rt/lib/cuda/allocator.cpp               | 10 +++++++
 .../unittests/Runtime/CUDA/Allocatable.cpp    | 29 +++++++++++++++++++
 flang/include/flang/Runtime/CUDA/allocator.h  |  1 +
 3 files changed, 40 insertions(+)

diff --git a/flang-rt/lib/cuda/allocator.cpp b/flang-rt/lib/cuda/allocator.cpp
index dc3ce0ee1b590..7aeabc2e12e1c 100644
--- a/flang-rt/lib/cuda/allocator.cpp
+++ b/flang-rt/lib/cuda/allocator.cpp
@@ -140,6 +140,16 @@ cudaStream_t RTDECL(CUFGetAssociatedStream)(void *p) {
   }
   return nullptr;
 }
+
+void RTDECL(CUFSetAssociatedStream)(void *p, cudaStream_t stream) {
+  int pos = findAllocation(p);
+  if (pos >= 0) {
+    deviceAllocations[pos].stream = stream;
+  } else {
+    insertAllocation(p, 0, stream);
+  }
+}
+
 }
 
 void *CUFAllocPinned(
diff --git a/flang-rt/unittests/Runtime/CUDA/Allocatable.cpp b/flang-rt/unittests/Runtime/CUDA/Allocatable.cpp
index f061c082cc614..38d215fd0c0c5 100644
--- a/flang-rt/unittests/Runtime/CUDA/Allocatable.cpp
+++ b/flang-rt/unittests/Runtime/CUDA/Allocatable.cpp
@@ -172,3 +172,32 @@ TEST(AllocatableAsyncTest, StreamDeviceAllocatable) {
   cudaStream_t empty = RTDECL(CUFGetAssociatedStream)(a->raw().base_addr);
   EXPECT_EQ(empty, nullptr);
 }
+
+TEST(AllocatableAsyncTest, SetStreamTest) {
+    using Fortran::common::TypeCategory;
+    RTNAME(CUFRegisterAllocator)();
+    // REAL(4), DEVICE, ALLOCATABLE :: a(:)
+    auto a{createAllocatable(TypeCategory::Real, 4)};
+    a->SetAllocIdx(kDeviceAllocatorPos);
+    EXPECT_EQ((int)kDeviceAllocatorPos, a->GetAllocIdx());
+    EXPECT_FALSE(a->HasAddendum());
+    RTNAME(AllocatableSetBounds)(*a, 0, 1, 10);
+  
+    cudaStream_t stream;
+    cudaStreamCreate(&stream);
+    EXPECT_EQ(cudaSuccess, cudaGetLastError());
+  
+    RTNAME(AllocatableAllocate)
+    (*a, /*asyncObject=*/nullptr, /*hasStat=*/false,
+        /*errMsg=*/nullptr, __FILE__, __LINE__);
+    EXPECT_TRUE(a->IsAllocated());
+    cudaDeviceSynchronize();
+    EXPECT_EQ(cudaSuccess, cudaGetLastError());
+    cudaStream_t defaultStream = 0;
+    cudaStream_t s = RTDECL(CUFGetAssociatedStream)(a->raw().base_addr);
+    EXPECT_EQ(s, defaultStream);
+
+    RTDECL(CUFSetAssociatedStream)(a->raw().base_addr, stream);
+    s = RTDECL(CUFGetAssociatedStream)(a->raw().base_addr);
+    EXPECT_EQ(s, stream);
+  }
diff --git a/flang/include/flang/Runtime/CUDA/allocator.h b/flang/include/flang/Runtime/CUDA/allocator.h
index 56176360296a9..e87fe2799af1e 100644
--- a/flang/include/flang/Runtime/CUDA/allocator.h
+++ b/flang/include/flang/Runtime/CUDA/allocator.h
@@ -21,6 +21,7 @@ extern "C" {
 
 void RTDECL(CUFRegisterAllocator)();
 cudaStream_t RTDECL(CUFGetAssociatedStream)(void *);
+void RTDECL(CUFSetAssociatedStream)(void *, cudaStream_t);
 }
 
 void *CUFAllocPinned(std::size_t, std::int64_t *);

>From 36565a57486f9b1ea261cd92e8bbd3f8661801d9 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Thu, 12 Feb 2026 22:18:32 -0800
Subject: [PATCH 2/3] clang-format

---
 flang-rt/lib/cuda/allocator.cpp               |  1 -
 .../unittests/Runtime/CUDA/Allocatable.cpp    | 54 +++++++++----------
 2 files changed, 27 insertions(+), 28 deletions(-)

diff --git a/flang-rt/lib/cuda/allocator.cpp b/flang-rt/lib/cuda/allocator.cpp
index 7aeabc2e12e1c..48ffce6a4a4a3 100644
--- a/flang-rt/lib/cuda/allocator.cpp
+++ b/flang-rt/lib/cuda/allocator.cpp
@@ -149,7 +149,6 @@ void RTDECL(CUFSetAssociatedStream)(void *p, cudaStream_t stream) {
     insertAllocation(p, 0, stream);
   }
 }
-
 }
 
 void *CUFAllocPinned(
diff --git a/flang-rt/unittests/Runtime/CUDA/Allocatable.cpp b/flang-rt/unittests/Runtime/CUDA/Allocatable.cpp
index 38d215fd0c0c5..fc263db805b4c 100644
--- a/flang-rt/unittests/Runtime/CUDA/Allocatable.cpp
+++ b/flang-rt/unittests/Runtime/CUDA/Allocatable.cpp
@@ -174,30 +174,30 @@ TEST(AllocatableAsyncTest, StreamDeviceAllocatable) {
 }
 
 TEST(AllocatableAsyncTest, SetStreamTest) {
-    using Fortran::common::TypeCategory;
-    RTNAME(CUFRegisterAllocator)();
-    // REAL(4), DEVICE, ALLOCATABLE :: a(:)
-    auto a{createAllocatable(TypeCategory::Real, 4)};
-    a->SetAllocIdx(kDeviceAllocatorPos);
-    EXPECT_EQ((int)kDeviceAllocatorPos, a->GetAllocIdx());
-    EXPECT_FALSE(a->HasAddendum());
-    RTNAME(AllocatableSetBounds)(*a, 0, 1, 10);
-  
-    cudaStream_t stream;
-    cudaStreamCreate(&stream);
-    EXPECT_EQ(cudaSuccess, cudaGetLastError());
-  
-    RTNAME(AllocatableAllocate)
-    (*a, /*asyncObject=*/nullptr, /*hasStat=*/false,
-        /*errMsg=*/nullptr, __FILE__, __LINE__);
-    EXPECT_TRUE(a->IsAllocated());
-    cudaDeviceSynchronize();
-    EXPECT_EQ(cudaSuccess, cudaGetLastError());
-    cudaStream_t defaultStream = 0;
-    cudaStream_t s = RTDECL(CUFGetAssociatedStream)(a->raw().base_addr);
-    EXPECT_EQ(s, defaultStream);
-
-    RTDECL(CUFSetAssociatedStream)(a->raw().base_addr, stream);
-    s = RTDECL(CUFGetAssociatedStream)(a->raw().base_addr);
-    EXPECT_EQ(s, stream);
-  }
+  using Fortran::common::TypeCategory;
+  RTNAME(CUFRegisterAllocator)();
+  // REAL(4), DEVICE, ALLOCATABLE :: a(:)
+  auto a{createAllocatable(TypeCategory::Real, 4)};
+  a->SetAllocIdx(kDeviceAllocatorPos);
+  EXPECT_EQ((int)kDeviceAllocatorPos, a->GetAllocIdx());
+  EXPECT_FALSE(a->HasAddendum());
+  RTNAME(AllocatableSetBounds)(*a, 0, 1, 10);
+
+  cudaStream_t stream;
+  cudaStreamCreate(&stream);
+  EXPECT_EQ(cudaSuccess, cudaGetLastError());
+
+  RTNAME(AllocatableAllocate)
+  (*a, /*asyncObject=*/nullptr, /*hasStat=*/false,
+      /*errMsg=*/nullptr, __FILE__, __LINE__);
+  EXPECT_TRUE(a->IsAllocated());
+  cudaDeviceSynchronize();
+  EXPECT_EQ(cudaSuccess, cudaGetLastError());
+  cudaStream_t defaultStream = 0;
+  cudaStream_t s = RTDECL(CUFGetAssociatedStream)(a->raw().base_addr);
+  EXPECT_EQ(s, defaultStream);
+
+  RTDECL(CUFSetAssociatedStream)(a->raw().base_addr, stream);
+  s = RTDECL(CUFGetAssociatedStream)(a->raw().base_addr);
+  EXPECT_EQ(s, stream);
+}

>From 71bf9bc1d3c27992b7f5bc6370a3ab62b8bba89c Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Fri, 13 Feb 2026 10:58:53 -0800
Subject: [PATCH 3/3] Update for stat

---
 flang-rt/lib/cuda/allocator.cpp                 |  8 +++++++-
 flang-rt/unittests/Runtime/CUDA/Allocatable.cpp | 10 +++++++++-
 flang/include/flang/Runtime/CUDA/allocator.h    |  4 +++-
 3 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/flang-rt/lib/cuda/allocator.cpp b/flang-rt/lib/cuda/allocator.cpp
index 48ffce6a4a4a3..917b279b38f3c 100644
--- a/flang-rt/lib/cuda/allocator.cpp
+++ b/flang-rt/lib/cuda/allocator.cpp
@@ -141,13 +141,19 @@ cudaStream_t RTDECL(CUFGetAssociatedStream)(void *p) {
   return nullptr;
 }
 
-void RTDECL(CUFSetAssociatedStream)(void *p, cudaStream_t stream) {
+int RTDECL(CUFSetAssociatedStream)(void *p, cudaStream_t stream, bool hasStat,
+    const Descriptor *errMsg, const char *sourceFile, int sourceLine) {
+  Terminator terminator{sourceFile, sourceLine};
+  if (p == nullptr) {
+    return ReturnError(terminator, StatBaseNull, errMsg, hasStat);
+  }
   int pos = findAllocation(p);
   if (pos >= 0) {
     deviceAllocations[pos].stream = stream;
   } else {
     insertAllocation(p, 0, stream);
   }
+  return StatOk;
 }
 }
 
diff --git a/flang-rt/unittests/Runtime/CUDA/Allocatable.cpp b/flang-rt/unittests/Runtime/CUDA/Allocatable.cpp
index fc263db805b4c..e308e8c8bdadb 100644
--- a/flang-rt/unittests/Runtime/CUDA/Allocatable.cpp
+++ b/flang-rt/unittests/Runtime/CUDA/Allocatable.cpp
@@ -11,6 +11,7 @@
 #include "gtest/gtest.h"
 #include "flang-rt/runtime/allocator-registry.h"
 #include "flang-rt/runtime/descriptor.h"
+#include "flang-rt/runtime/stat.h"
 #include "flang-rt/runtime/terminator.h"
 #include "flang/Runtime/CUDA/allocator.h"
 #include "flang/Runtime/CUDA/common.h"
@@ -197,7 +198,14 @@ TEST(AllocatableAsyncTest, SetStreamTest) {
   cudaStream_t s = RTDECL(CUFGetAssociatedStream)(a->raw().base_addr);
   EXPECT_EQ(s, defaultStream);
 
-  RTDECL(CUFSetAssociatedStream)(a->raw().base_addr, stream);
+  int stat1 = RTDECL(CUFSetAssociatedStream)(a->raw().base_addr, stream);
+  EXPECT_EQ(stat1, StatOk);
   s = RTDECL(CUFGetAssociatedStream)(a->raw().base_addr);
   EXPECT_EQ(s, stream);
+
+  // REAL(4), DEVICE, ALLOCATABLE :: b(:) - unallocated, base_addr is null
+  auto b{createAllocatable(TypeCategory::Real, 4)};
+  int stat2 = RTDECL(CUFSetAssociatedStream)(
+      b->raw().base_addr, stream, true, nullptr, __FILE__, __LINE__);
+  EXPECT_EQ(stat2, StatBaseNull);
 }
diff --git a/flang/include/flang/Runtime/CUDA/allocator.h b/flang/include/flang/Runtime/CUDA/allocator.h
index e87fe2799af1e..6a64bdeccbc2c 100644
--- a/flang/include/flang/Runtime/CUDA/allocator.h
+++ b/flang/include/flang/Runtime/CUDA/allocator.h
@@ -21,7 +21,9 @@ extern "C" {
 
 void RTDECL(CUFRegisterAllocator)();
 cudaStream_t RTDECL(CUFGetAssociatedStream)(void *);
-void RTDECL(CUFSetAssociatedStream)(void *, cudaStream_t);
+int RTDECL(CUFSetAssociatedStream)(void *, cudaStream_t, bool hasStat = false,
+    const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr,
+    int sourceLine = 0);
 }
 
 void *CUFAllocPinned(std::size_t, std::int64_t *);



More information about the llvm-commits mailing list