[flang] [llvm] [flang][cuda] Lower set/get default stream for arrays (PR #181432)
Valentin Clement バレンタイン クレメン via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 13 15:24:43 PST 2026
https://github.com/clementval updated https://github.com/llvm/llvm-project/pull/181432
>From 5b93b1ea263971004ae5431b5d1b8de007d0b9fd Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Fri, 13 Feb 2026 13:58:48 -0800
Subject: [PATCH 1/5] [flang][cuda] Lower set/get default stream for arrays
---
flang-rt/lib/cuda/allocator.cpp | 6 +--
.../unittests/Runtime/CUDA/Allocatable.cpp | 3 +-
.../Optimizer/Builder/CUDAIntrinsicCall.h | 6 +++
flang/include/flang/Runtime/CUDA/allocator.h | 4 +-
.../Optimizer/Builder/CUDAIntrinsicCall.cpp | 52 +++++++++++++++++++
flang/module/cuda_runtime_api.f90 | 39 ++++++++++++++
flang/test/Lower/CUDA/cuda-default-stream.cuf | 24 +++++++++
flang/tools/f18/CMakeLists.txt | 4 +-
8 files changed, 128 insertions(+), 10 deletions(-)
create mode 100644 flang/module/cuda_runtime_api.f90
create mode 100644 flang/test/Lower/CUDA/cuda-default-stream.cuf
diff --git a/flang-rt/lib/cuda/allocator.cpp b/flang-rt/lib/cuda/allocator.cpp
index 917b279b38f3c..df7e43de00c70 100644
--- a/flang-rt/lib/cuda/allocator.cpp
+++ b/flang-rt/lib/cuda/allocator.cpp
@@ -141,11 +141,9 @@ cudaStream_t RTDECL(CUFGetAssociatedStream)(void *p) {
return nullptr;
}
-int RTDECL(CUFSetAssociatedStream)(void *p, cudaStream_t stream, bool hasStat,
- const Descriptor *errMsg, const char *sourceFile, int sourceLine) {
- Terminator terminator{sourceFile, sourceLine};
+int RTDECL(CUFSetAssociatedStream)(void *p, cudaStream_t stream) {
if (p == nullptr) {
- return ReturnError(terminator, StatBaseNull, errMsg, hasStat);
+ return StatBaseNull;
}
int pos = findAllocation(p);
if (pos >= 0) {
diff --git a/flang-rt/unittests/Runtime/CUDA/Allocatable.cpp b/flang-rt/unittests/Runtime/CUDA/Allocatable.cpp
index e308e8c8bdadb..0680c0086ea73 100644
--- a/flang-rt/unittests/Runtime/CUDA/Allocatable.cpp
+++ b/flang-rt/unittests/Runtime/CUDA/Allocatable.cpp
@@ -205,7 +205,6 @@ TEST(AllocatableAsyncTest, SetStreamTest) {
// REAL(4), DEVICE, ALLOCATABLE :: b(:) - unallocated, base_addr is null
auto b{createAllocatable(TypeCategory::Real, 4)};
- int stat2 = RTDECL(CUFSetAssociatedStream)(
- b->raw().base_addr, stream, true, nullptr, __FILE__, __LINE__);
+ int stat2 = RTDECL(CUFSetAssociatedStream)(b->raw().base_addr, stream);
EXPECT_EQ(stat2, StatBaseNull);
}
diff --git a/flang/include/flang/Optimizer/Builder/CUDAIntrinsicCall.h b/flang/include/flang/Optimizer/Builder/CUDAIntrinsicCall.h
index e9b6e5cf23933..46b6087f899ca 100644
--- a/flang/include/flang/Optimizer/Builder/CUDAIntrinsicCall.h
+++ b/flang/include/flang/Optimizer/Builder/CUDAIntrinsicCall.h
@@ -51,6 +51,12 @@ struct CUDAIntrinsicLibrary : IntrinsicLibrary {
mlir::Value genBarrierTryWaitSleep(mlir::Type, llvm::ArrayRef<mlir::Value>);
mlir::Value genClusterBlockIndex(mlir::Type, llvm::ArrayRef<mlir::Value>);
mlir::Value genClusterDimBlocks(mlir::Type, llvm::ArrayRef<mlir::Value>);
+ fir::ExtendedValue
+ genCUDAForSetDefaultStreamArray(mlir::Type,
+ llvm::ArrayRef<fir::ExtendedValue>);
+ fir::ExtendedValue
+ genCUDAGetDefaultStreamArg(mlir::Type,
+ llvm::ArrayRef<fir::ExtendedValue>);
void genFenceProxyAsync(llvm::ArrayRef<fir::ExtendedValue>);
template <const char *fctName, int extent>
fir::ExtendedValue genLDXXFunc(mlir::Type,
diff --git a/flang/include/flang/Runtime/CUDA/allocator.h b/flang/include/flang/Runtime/CUDA/allocator.h
index 6a64bdeccbc2c..1493383a559c0 100644
--- a/flang/include/flang/Runtime/CUDA/allocator.h
+++ b/flang/include/flang/Runtime/CUDA/allocator.h
@@ -21,9 +21,7 @@ extern "C" {
void RTDECL(CUFRegisterAllocator)();
cudaStream_t RTDECL(CUFGetAssociatedStream)(void *);
-int RTDECL(CUFSetAssociatedStream)(void *, cudaStream_t, bool hasStat = false,
- const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr,
- int sourceLine = 0);
+int RTDECL(CUFSetAssociatedStream)(void *, cudaStream_t);
}
void *CUFAllocPinned(std::size_t, std::int64_t *);
diff --git a/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp b/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp
index fe2db4607f86b..63b34016bbb3b 100644
--- a/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp
@@ -19,6 +19,7 @@
#include "flang/Optimizer/Builder/MutableBox.h"
#include "flang/Optimizer/Dialect/CUF/CUFOps.h"
#include "flang/Optimizer/HLFIR/HLFIROps.h"
+#include "flang/Runtime/entry-names.h"
#include "mlir/Dialect/Index/IR/IndexOps.h"
#include "mlir/Dialect/SCF/IR/SCF.h"
#include "mlir/Dialect/Vector/IR/VectorOps.h"
@@ -382,6 +383,16 @@ static constexpr IntrinsicHandler cudaHandlers[]{
&CI::genClusterDimBlocks),
{},
/*isElemental=*/false},
+ {"cudasetstreamarray",
+ static_cast<CUDAIntrinsicLibrary::ExtendedGenerator>(
+ &CI::genCUDAForSetDefaultStreamArray),
+ {{{"devptr", asAddr}, {"stream", asValue}}},
+ /*isElemental=*/false},
+ {"cudastreamgetdefaultarg",
+ static_cast<CUDAIntrinsicLibrary::ExtendedGenerator>(
+ &CI::genCUDAGetDefaultStreamArg),
+ {{{"devptr", asAddr}}},
+ /*isElemental=*/false},
{"fence_proxy_async",
static_cast<CUDAIntrinsicLibrary::SubroutineGenerator>(
&CI::genFenceProxyAsync),
@@ -1103,6 +1114,47 @@ CUDAIntrinsicLibrary::genClusterDimBlocks(mlir::Type resultType,
return res;
}
+// CUDAFORSETSTREAMARRAY
+fir::ExtendedValue CUDAIntrinsicLibrary::genCUDAForSetDefaultStreamArray(
+ mlir::Type resTy, llvm::ArrayRef<fir::ExtendedValue> args) {
+ assert(args.size() == 2);
+ mlir::Value arg = fir::getBase(args[0]);
+ mlir::Value stream = fir::getBase(args[1]);
+
+ if (mlir::isa<fir::BaseBoxType>(arg.getType()))
+ arg = fir::BoxAddrOp::create(builder, loc, arg);
+ mlir::Type i64Ty = builder.getI64Type();
+ mlir::Type i32Ty = builder.getI32Type();
+ auto ctx = builder.getContext();
+ mlir::Type voidPtrTy =
+ fir::LLVMPointerType::get(ctx, mlir::IntegerType::get(ctx, 8));
+ mlir::FunctionType ftype =
+ mlir::FunctionType::get(ctx, {voidPtrTy, i64Ty}, {i32Ty});
+ mlir::Value voidPtr = builder.createConvert(loc, voidPtrTy, arg);
+ auto funcOp =
+ builder.createFunction(loc, RTNAME_STRING(CUFSetAssociatedStream), ftype);
+ auto call = fir::CallOp::create(builder, loc, funcOp, {voidPtr, stream});
+ return call.getResult(0);
+}
+
+// CUDAGETDEFAULTSTREAMARG
+fir::ExtendedValue CUDAIntrinsicLibrary::genCUDAGetDefaultStreamArg(
+ mlir::Type resultType, llvm::ArrayRef<fir::ExtendedValue> args) {
+ assert(args.size() == 1);
+ mlir::Value devptr = fir::getBase(args[0]);
+ mlir::Type i64Ty = builder.getI64Type();
+ mlir::Type i32Ty = builder.getI32Type();
+ auto ctx = builder.getContext();
+ mlir::Type voidPtrTy =
+ fir::LLVMPointerType::get(ctx, mlir::IntegerType::get(ctx, 8));
+ mlir::FunctionType ftype = mlir::FunctionType::get(ctx, {voidPtrTy}, {i64Ty});
+ mlir::Value voidPtr = builder.createConvert(loc, voidPtrTy, devptr);
+ auto funcOp =
+ builder.createFunction(loc, RTNAME_STRING(CUFGetAssociatedStream), ftype);
+ auto call = fir::CallOp::create(builder, loc, funcOp, {voidPtr});
+ return call.getResult(0);
+}
+
// FENCE_PROXY_ASYNC
void CUDAIntrinsicLibrary::genFenceProxyAsync(
llvm::ArrayRef<fir::ExtendedValue> args) {
diff --git a/flang/module/cuda_runtime_api.f90 b/flang/module/cuda_runtime_api.f90
new file mode 100644
index 0000000000000..b02ae5a49a0e6
--- /dev/null
+++ b/flang/module/cuda_runtime_api.f90
@@ -0,0 +1,39 @@
+!===-- module/cuda_runtime_api.f90 -----------------------------------------===!
+!
+! Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+! See https://llvm.org/LICENSE.txt for license information.
+! SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+!
+!===------------------------------------------------------------------------===!
+
+module cuda_runtime_api
+implicit none
+
+integer, parameter :: cuda_stream_kind = int_ptr_kind()
+
+interface cudaforgetdefaultstream
+ integer(kind=cuda_stream_kind) function cudastreamgetdefaultarg(devptr)
+ import cuda_stream_kind
+ !DIR$ IGNORE_TKR (TKR) devptr
+ integer, device :: devptr(*)
+ end function
+ integer(kind=cuda_stream_kind) function cudastreamgetdefaultnull()
+ import cuda_stream_kind
+ end function
+end interface
+
+interface cudaforsetdefaultstream
+ integer function cudasetdefaultstream(stream)
+ import cuda_stream_kind
+ !DIR$ IGNORE_TKR (K) stream
+ integer(kind=cuda_stream_kind), value :: stream
+ end function
+ integer function cudasetstreamarray(devptr, stream)
+ import cuda_stream_kind
+ !DIR$ IGNORE_TKR (K) stream, (TKR) devptr
+ integer, device :: devptr(*)
+ integer(kind=cuda_stream_kind), value :: stream
+ end function
+end interface
+
+end module cuda_runtime_api
diff --git a/flang/test/Lower/CUDA/cuda-default-stream.cuf b/flang/test/Lower/CUDA/cuda-default-stream.cuf
new file mode 100644
index 0000000000000..59c6bc6b70612
--- /dev/null
+++ b/flang/test/Lower/CUDA/cuda-default-stream.cuf
@@ -0,0 +1,24 @@
+! RUN: bbc -emit-hlfir -fcuda %s -o - | FileCheck %s
+
+subroutine associated_stream
+ use cuda_runtime_api
+ integer(kind=cuda_stream_kind) :: strm, strmout
+ integer, managed, allocatable :: v(:)
+ integer :: istat
+
+ istat = cudaforSetDefaultStream(v, strm)
+ strmout = cudaforGetDefaultStream(v)
+
+end subroutine
+
+! CHECK-LABEL: func.func @_QPassociated_stream()
+! CHECK: %[[ADDR:.*]] = fir.box_addr %{{.*}} : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
+! CHECK: %[[STREAM:.*]] = fir.load %{{.*}}#0 : !fir.ref<i64>
+! CHECK: %[[VOIDPTR:.*]] = fir.convert %[[ADDR]] : (!fir.heap<!fir.array<?xi32>>) -> !fir.llvm_ptr<i8>
+! CHECK: %[[STAT:.*]] = fir.call @_FortranACUFSetAssociatedStream(%[[VOIDPTR]], %[[STREAM]]) fastmath<contract> : (!fir.llvm_ptr<i8>, i64) -> i32
+! CHECK: hlfir.assign %[[STAT]] to %{{.*}}#0 : i32, !fir.ref<i32>
+
+! CHECK: %[[ADDR:.*]] = fir.box_addr %{{.*}} : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
+! CHECK: %[[VOIDPTR:.*]] = fir.convert %[[ADDR]] : (!fir.heap<!fir.array<?xi32>>) -> !fir.llvm_ptr<i8>
+! CHECK: %[[STREAM:.*]] = fir.call @_FortranACUFGetAssociatedStream(%[[VOIDPTR]]) fastmath<contract> : (!fir.llvm_ptr<i8>) -> i64
+! CHECK: hlfir.assign %[[STREAM]] to %{{.*}}#0 : i64, !fir.ref<i64>
diff --git a/flang/tools/f18/CMakeLists.txt b/flang/tools/f18/CMakeLists.txt
index ffd92f033840b..74b329f6c6c03 100644
--- a/flang/tools/f18/CMakeLists.txt
+++ b/flang/tools/f18/CMakeLists.txt
@@ -16,6 +16,7 @@ set(MODULES
"__cuda_builtins"
"__cuda_device"
"cooperative_groups"
+ "cuda_runtime_api"
"cudadevice"
"ieee_arithmetic"
"ieee_exceptions"
@@ -64,7 +65,8 @@ if (NOT CMAKE_CROSSCOMPILING)
set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__ppc_types.mod)
elseif(${filename} STREQUAL "__cuda_device" OR
${filename} STREQUAL "cudadevice" OR
- ${filename} STREQUAL "cooperative_groups")
+ ${filename} STREQUAL "cooperative_groups" OR
+ ${filename} STREQUAL "cuda_runtime_api")
set(opts -fc1 -xcuda)
if(${filename} STREQUAL "__cuda_device")
set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__cuda_builtins.mod)
>From 932f3e29554dc5d8a83eacfefe5f5de5eecd5dd4 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Fri, 13 Feb 2026 14:26:23 -0800
Subject: [PATCH 2/5] Remove unused var
---
flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp | 1 -
1 file changed, 1 deletion(-)
diff --git a/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp b/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp
index 63b34016bbb3b..f2e14b85b28c2 100644
--- a/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp
@@ -1143,7 +1143,6 @@ fir::ExtendedValue CUDAIntrinsicLibrary::genCUDAGetDefaultStreamArg(
assert(args.size() == 1);
mlir::Value devptr = fir::getBase(args[0]);
mlir::Type i64Ty = builder.getI64Type();
- mlir::Type i32Ty = builder.getI32Type();
auto ctx = builder.getContext();
mlir::Type voidPtrTy =
fir::LLVMPointerType::get(ctx, mlir::IntegerType::get(ctx, 8));
>From af31bd3125491130d7e66f574086eb2cf81ca1e8 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Fri, 13 Feb 2026 14:49:37 -0800
Subject: [PATCH 3/5] Fix names
---
.../flang/Optimizer/Builder/CUDAIntrinsicCall.h | 4 ++--
flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp | 14 +++++++-------
flang/module/cuda_runtime_api.f90 | 2 +-
3 files changed, 10 insertions(+), 10 deletions(-)
diff --git a/flang/include/flang/Optimizer/Builder/CUDAIntrinsicCall.h b/flang/include/flang/Optimizer/Builder/CUDAIntrinsicCall.h
index 46b6087f899ca..d92f0c72dde0d 100644
--- a/flang/include/flang/Optimizer/Builder/CUDAIntrinsicCall.h
+++ b/flang/include/flang/Optimizer/Builder/CUDAIntrinsicCall.h
@@ -52,8 +52,8 @@ struct CUDAIntrinsicLibrary : IntrinsicLibrary {
mlir::Value genClusterBlockIndex(mlir::Type, llvm::ArrayRef<mlir::Value>);
mlir::Value genClusterDimBlocks(mlir::Type, llvm::ArrayRef<mlir::Value>);
fir::ExtendedValue
- genCUDAForSetDefaultStreamArray(mlir::Type,
- llvm::ArrayRef<fir::ExtendedValue>);
+ genCUDASetDefaultStreamArray(mlir::Type,
+ llvm::ArrayRef<fir::ExtendedValue>);
fir::ExtendedValue
genCUDAGetDefaultStreamArg(mlir::Type,
llvm::ArrayRef<fir::ExtendedValue>);
diff --git a/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp b/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp
index f2e14b85b28c2..dcbf1531664a0 100644
--- a/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp
@@ -383,16 +383,16 @@ static constexpr IntrinsicHandler cudaHandlers[]{
&CI::genClusterDimBlocks),
{},
/*isElemental=*/false},
- {"cudasetstreamarray",
- static_cast<CUDAIntrinsicLibrary::ExtendedGenerator>(
- &CI::genCUDAForSetDefaultStreamArray),
- {{{"devptr", asAddr}, {"stream", asValue}}},
- /*isElemental=*/false},
- {"cudastreamgetdefaultarg",
+ {"cudagetstreamdefaultarg",
static_cast<CUDAIntrinsicLibrary::ExtendedGenerator>(
&CI::genCUDAGetDefaultStreamArg),
{{{"devptr", asAddr}}},
/*isElemental=*/false},
+ {"cudasetstreamarray",
+ static_cast<CUDAIntrinsicLibrary::ExtendedGenerator>(
+ &CI::genCUDASetDefaultStreamArray),
+ {{{"devptr", asAddr}, {"stream", asValue}}},
+ /*isElemental=*/false},
{"fence_proxy_async",
static_cast<CUDAIntrinsicLibrary::SubroutineGenerator>(
&CI::genFenceProxyAsync),
@@ -1115,7 +1115,7 @@ CUDAIntrinsicLibrary::genClusterDimBlocks(mlir::Type resultType,
}
// CUDAFORSETSTREAMARRAY
-fir::ExtendedValue CUDAIntrinsicLibrary::genCUDAForSetDefaultStreamArray(
+fir::ExtendedValue CUDAIntrinsicLibrary::genCUDASetDefaultStreamArray(
mlir::Type resTy, llvm::ArrayRef<fir::ExtendedValue> args) {
assert(args.size() == 2);
mlir::Value arg = fir::getBase(args[0]);
diff --git a/flang/module/cuda_runtime_api.f90 b/flang/module/cuda_runtime_api.f90
index b02ae5a49a0e6..0ad7ed0ebf5b5 100644
--- a/flang/module/cuda_runtime_api.f90
+++ b/flang/module/cuda_runtime_api.f90
@@ -12,7 +12,7 @@ module cuda_runtime_api
integer, parameter :: cuda_stream_kind = int_ptr_kind()
interface cudaforgetdefaultstream
- integer(kind=cuda_stream_kind) function cudastreamgetdefaultarg(devptr)
+ integer(kind=cuda_stream_kind) function cudagetstreamdefaultarg(devptr)
import cuda_stream_kind
!DIR$ IGNORE_TKR (TKR) devptr
integer, device :: devptr(*)
>From 433f1c44165cece2c050436a70f3e081eb7e147d Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Fri, 13 Feb 2026 15:17:39 -0800
Subject: [PATCH 4/5] Typo
---
flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp b/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp
index dcbf1531664a0..4c4403dcd71a9 100644
--- a/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp
@@ -1114,7 +1114,7 @@ CUDAIntrinsicLibrary::genClusterDimBlocks(mlir::Type resultType,
return res;
}
-// CUDAFORSETSTREAMARRAY
+// CUDASETSTREAMARRAY
fir::ExtendedValue CUDAIntrinsicLibrary::genCUDASetDefaultStreamArray(
mlir::Type resTy, llvm::ArrayRef<fir::ExtendedValue> args) {
assert(args.size() == 2);
>From 9bfb19eebb58df96531b4af7f8238d688e090f79 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Fri, 13 Feb 2026 15:24:30 -0800
Subject: [PATCH 5/5] Format
---
flang/module/cuda_runtime_api.f90 | 38 +++++++++++++++----------------
1 file changed, 19 insertions(+), 19 deletions(-)
diff --git a/flang/module/cuda_runtime_api.f90 b/flang/module/cuda_runtime_api.f90
index 0ad7ed0ebf5b5..d6cb6d8c0f715 100644
--- a/flang/module/cuda_runtime_api.f90
+++ b/flang/module/cuda_runtime_api.f90
@@ -12,28 +12,28 @@ module cuda_runtime_api
integer, parameter :: cuda_stream_kind = int_ptr_kind()
interface cudaforgetdefaultstream
- integer(kind=cuda_stream_kind) function cudagetstreamdefaultarg(devptr)
- import cuda_stream_kind
- !DIR$ IGNORE_TKR (TKR) devptr
- integer, device :: devptr(*)
- end function
- integer(kind=cuda_stream_kind) function cudastreamgetdefaultnull()
- import cuda_stream_kind
- end function
+ integer(kind=cuda_stream_kind) function cudagetstreamdefaultarg(devptr)
+ import cuda_stream_kind
+ !DIR$ IGNORE_TKR (TKR) devptr
+ integer, device :: devptr(*)
+ end function
+ integer(kind=cuda_stream_kind) function cudastreamgetdefaultnull()
+ import cuda_stream_kind
+ end function
end interface
interface cudaforsetdefaultstream
- integer function cudasetdefaultstream(stream)
- import cuda_stream_kind
- !DIR$ IGNORE_TKR (K) stream
- integer(kind=cuda_stream_kind), value :: stream
- end function
- integer function cudasetstreamarray(devptr, stream)
- import cuda_stream_kind
- !DIR$ IGNORE_TKR (K) stream, (TKR) devptr
- integer, device :: devptr(*)
- integer(kind=cuda_stream_kind), value :: stream
- end function
+ integer function cudasetdefaultstream(stream)
+ import cuda_stream_kind
+ !DIR$ IGNORE_TKR (K) stream
+ integer(kind=cuda_stream_kind), value :: stream
+ end function
+ integer function cudasetstreamarray(devptr, stream)
+ import cuda_stream_kind
+ !DIR$ IGNORE_TKR (K) stream, (TKR) devptr
+ integer, device :: devptr(*)
+ integer(kind=cuda_stream_kind), value :: stream
+ end function
end interface
end module cuda_runtime_api
More information about the llvm-commits
mailing list