[flang-commits] [flang] 7772a45 - [flang][cuda] Add entry points for cudastreamsynchronize (#181932)
via flang-commits
flang-commits at lists.llvm.org
Wed Feb 18 15:54:58 PST 2026
Author: Valentin Clement (バレンタイン クレメン)
Date: 2026-02-18T15:54:54-08:00
New Revision: 7772a45b1a25896853ad67ee9c58844e3d4d6ef4
URL: https://github.com/llvm/llvm-project/commit/7772a45b1a25896853ad67ee9c58844e3d4d6ef4
DIFF: https://github.com/llvm/llvm-project/commit/7772a45b1a25896853ad67ee9c58844e3d4d6ef4.diff
LOG: [flang][cuda] Add entry points for cudastreamsynchronize (#181932)
Added:
Modified:
flang-rt/lib/cuda/stream.cpp
flang/include/flang/Optimizer/Builder/CUDAIntrinsicCall.h
flang/include/flang/Runtime/CUDA/stream.h
flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp
flang/module/cuda_runtime_api.f90
flang/test/Lower/CUDA/cuda-default-stream.cuf
Removed:
################################################################################
diff --git a/flang-rt/lib/cuda/stream.cpp b/flang-rt/lib/cuda/stream.cpp
index af9e63f6a85b1..217aa9501e510 100644
--- a/flang-rt/lib/cuda/stream.cpp
+++ b/flang-rt/lib/cuda/stream.cpp
@@ -29,6 +29,15 @@ int RTDECL(CUFSetDefaultStream)(cudaStream_t stream) {
}
cudaStream_t RTDECL(CUFGetDefaultStream)() { return defaultStream; }
+
+int RTDECL(CUFStreamSynchronize)(cudaStream_t stream) {
+ return cudaStreamSynchronize(stream);
+}
+
+int RTDECL(CUFStreamSynchronizeNull)() {
+ cudaStream_t defaultStream = 0;
+ return cudaStreamSynchronize(defaultStream);
+}
}
} // namespace Fortran::runtime::cuda
diff --git a/flang/include/flang/Optimizer/Builder/CUDAIntrinsicCall.h b/flang/include/flang/Optimizer/Builder/CUDAIntrinsicCall.h
index 3e23a4dfa0203..5e8fbcd1b93ac 100644
--- a/flang/include/flang/Optimizer/Builder/CUDAIntrinsicCall.h
+++ b/flang/include/flang/Optimizer/Builder/CUDAIntrinsicCall.h
@@ -61,6 +61,10 @@ struct CUDAIntrinsicLibrary : IntrinsicLibrary {
llvm::ArrayRef<fir::ExtendedValue>);
mlir::Value genCUDAGetDefaultStreamNull(mlir::Type,
llvm::ArrayRef<mlir::Value>);
+ fir::ExtendedValue
+ genCUDAStreamSynchronize(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
+ mlir::Value genCUDAStreamSynchronizeNull(mlir::Type,
+ llvm::ArrayRef<mlir::Value>);
void genFenceProxyAsync(llvm::ArrayRef<fir::ExtendedValue>);
template <const char *fctName, int extent>
fir::ExtendedValue genLDXXFunc(mlir::Type,
diff --git a/flang/include/flang/Runtime/CUDA/stream.h b/flang/include/flang/Runtime/CUDA/stream.h
index ce97d0d067217..686b828d4e145 100644
--- a/flang/include/flang/Runtime/CUDA/stream.h
+++ b/flang/include/flang/Runtime/CUDA/stream.h
@@ -21,6 +21,8 @@ extern "C" {
int RTDECL(CUFSetDefaultStream)(cudaStream_t);
cudaStream_t RTDECL(CUFGetDefaultStream)();
+int RTDECL(CUFStreamSynchronize)(cudaStream_t);
+int RTDECL(CUFStreamSynchronizeNull)();
}
} // namespace Fortran::runtime::cuda
diff --git a/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp b/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp
index bbc353634cd42..e5b07aa3861a2 100644
--- a/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp
@@ -403,6 +403,16 @@ static constexpr IntrinsicHandler cudaHandlers[]{
&CI::genCUDASetDefaultStream),
{{{"stream", asValue}}},
/*isElemental=*/false},
+ {"cudastreamsynchronize",
+ static_cast<CUDAIntrinsicLibrary::ExtendedGenerator>(
+ &CI::genCUDAStreamSynchronize),
+ {{{"stream", asValue}}},
+ /*isElemental=*/false},
+ {"cudastreamsynchronizenull",
+ static_cast<CUDAIntrinsicLibrary::ElementalGenerator>(
+ &CI::genCUDAStreamSynchronizeNull),
+ {},
+ /*isElemental=*/false},
{"fence_proxy_async",
static_cast<CUDAIntrinsicLibrary::SubroutineGenerator>(
&CI::genFenceProxyAsync),
@@ -1161,6 +1171,32 @@ fir::ExtendedValue CUDAIntrinsicLibrary::genCUDASetDefaultStreamArray(
return call.getResult(0);
}
+// CUDASTREAMSYNCHRONIZE
+fir::ExtendedValue CUDAIntrinsicLibrary::genCUDAStreamSynchronize(
+ mlir::Type resTy, llvm::ArrayRef<fir::ExtendedValue> args) {
+ assert(args.size() == 1);
+ mlir::Value stream = fir::getBase(args[0]);
+ mlir::Type i64Ty = builder.getI64Type();
+ auto ctx = builder.getContext();
+ mlir::FunctionType ftype = mlir::FunctionType::get(ctx, {i64Ty}, {resTy});
+ auto funcOp =
+ builder.createFunction(loc, RTNAME_STRING(CUFStreamSynchronize), ftype);
+ auto call = fir::CallOp::create(builder, loc, funcOp, {stream});
+ return call.getResult(0);
+}
+
+// CUDASTREAMSYNCHRONIZENULL
+mlir::Value CUDAIntrinsicLibrary::genCUDAStreamSynchronizeNull(
+ mlir::Type resTy, llvm::ArrayRef<mlir::Value> args) {
+ assert(args.size() == 0);
+ auto ctx = builder.getContext();
+ mlir::FunctionType ftype = mlir::FunctionType::get(ctx, {}, {resTy});
+ auto funcOp = builder.createFunction(
+ loc, RTNAME_STRING(CUFStreamSynchronizeNull), ftype);
+ auto call = fir::CallOp::create(builder, loc, funcOp, {});
+ return call.getResult(0);
+}
+
// CUDAGETDEFAULTSTREAMARG
fir::ExtendedValue CUDAIntrinsicLibrary::genCUDAGetDefaultStreamArg(
mlir::Type resultType, llvm::ArrayRef<fir::ExtendedValue> args) {
diff --git a/flang/module/cuda_runtime_api.f90 b/flang/module/cuda_runtime_api.f90
index 7c6968cabc373..437bb53d8b27d 100644
--- a/flang/module/cuda_runtime_api.f90
+++ b/flang/module/cuda_runtime_api.f90
@@ -36,4 +36,14 @@ integer function cudasetstreamarray(devptr, stream)
end function
end interface
+interface cudaStreamSynchronize
+ integer function cudastreamsynchronize(stream)
+ import cuda_stream_kind
+ !DIR$ IGNORE_TKR (K) stream
+ integer(kind=cuda_stream_kind), value :: stream
+ end function
+ integer function cudastreamsynchronizenull()
+ end function
+end interface
+
end module cuda_runtime_api
diff --git a/flang/test/Lower/CUDA/cuda-default-stream.cuf b/flang/test/Lower/CUDA/cuda-default-stream.cuf
index beacb409f44f6..5bc78af22e84b 100644
--- a/flang/test/Lower/CUDA/cuda-default-stream.cuf
+++ b/flang/test/Lower/CUDA/cuda-default-stream.cuf
@@ -38,3 +38,15 @@ end subroutine
! CHECK: %{{.*}} = fir.call @_FortranACUFSetDefaultStream(%{{.*}}) fastmath<contract> : (i64) -> i32
! CHECK: %{{.*}} = fir.call @_FortranACUFGetDefaultStream() fastmath<contract> : () -> i64
! CHECK: %{{.*}} = fir.call @_FortranACUFGetDefaultStream() fastmath<contract> : () -> i64
+
+subroutine stream_synchronize
+ use cuda_runtime_api
+ integer(kind=cuda_stream_kind) :: strm
+ integer :: istat
+ istat = cudastreamsynchronize(strm)
+ istat = cudastreamsynchronize()
+end subroutine
+
+! CHECK-LABEL: func.func @_QPstream_synchronize()
+! CHECK: %{{.*}} = fir.call @_FortranACUFStreamSynchronize(%{{.*}}) fastmath<contract> : (i64) -> i32
+! CHECK: %{{.*}} = fir.call @_FortranACUFStreamSynchronizeNull() fastmath<contract> : () -> i32
More information about the flang-commits
mailing list