[flang-commits] [flang] 4b248f2 - [flang][cuda] Widen stream argument to i64 in stream intrinsic lowering (#196650)
via flang-commits
flang-commits at lists.llvm.org
Fri May 8 15:50:29 PDT 2026
Author: Zhen Wang
Date: 2026-05-08T15:50:24-07:00
New Revision: 4b248f20559b5f208d7ca8d5fb96ddbd84015f76
URL: https://github.com/llvm/llvm-project/commit/4b248f20559b5f208d7ca8d5fb96ddbd84015f76
DIFF: https://github.com/llvm/llvm-project/commit/4b248f20559b5f208d7ca8d5fb96ddbd84015f76.diff
LOG: [flang][cuda] Widen stream argument to i64 in stream intrinsic lowering (#196650)
`genCUDASetDefaultStream` and `genCUDAStreamDestroy` build their runtime
call with an `i64` stream parameter but pass the actual argument
straight through, so a smaller-kind actual (e.g. the literal `0` in
`cudaforSetDefaultStream(0)`) produces an ill-typed `fir.call`:
```
error: 'llvm.call' op operand type mismatch for operand 0: 'i32' != 'i64'
```
Insert a `fir.convert` to `i64` before the call, matching what
`genCUDASetDefaultStreamArray` already does.
Added:
Modified:
flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp
flang/test/Lower/CUDA/cuda-default-stream.cuf
Removed:
################################################################################
diff --git a/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp b/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp
index b53294b68ac92..bc95d7d2893a7 100644
--- a/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp
@@ -1135,6 +1135,8 @@ fir::ExtendedValue CUDAIntrinsicLibrary::genCUDASetDefaultStream(
assert(args.size() == 1);
mlir::Value stream = fir::getBase(args[0]);
mlir::Type i64Ty = builder.getI64Type();
+ // Widen to i64 to accept smaller integer-kind actuals (e.g. literal 0).
+ stream = builder.createConvert(loc, i64Ty, stream);
auto ctx = builder.getContext();
mlir::FunctionType ftype = mlir::FunctionType::get(ctx, {i64Ty}, {resTy});
auto funcOp =
@@ -1172,6 +1174,7 @@ fir::ExtendedValue CUDAIntrinsicLibrary::genCUDAStreamDestroy(
assert(args.size() == 1);
mlir::Value stream = fir::getBase(args[0]);
mlir::Type i64Ty = builder.getI64Type();
+ stream = builder.createConvert(loc, i64Ty, stream);
auto ctx = builder.getContext();
mlir::FunctionType ftype = mlir::FunctionType::get(ctx, {i64Ty}, {resTy});
auto funcOp =
diff --git a/flang/test/Lower/CUDA/cuda-default-stream.cuf b/flang/test/Lower/CUDA/cuda-default-stream.cuf
index 5fc7de68b47d4..af09604865431 100644
--- a/flang/test/Lower/CUDA/cuda-default-stream.cuf
+++ b/flang/test/Lower/CUDA/cuda-default-stream.cuf
@@ -49,3 +49,37 @@ end subroutine
! CHECK-LABEL: func.func @_QPstream_destroy()
! CHECK: %{{.*}} = fir.call @_FortranACUFStreamDestroy(%{{.*}}) fastmath<contract> : (i64) -> i32
+
+! A default-kind (i32) actual argument must be widened to i64 before
+! reaching the runtime stream call.
+subroutine default_stream_i32_literal
+ use cuda_runtime_api
+ integer :: istat
+ istat = cudaforSetDefaultStream(0)
+ istat = cudaStreamDestroy(0)
+end subroutine
+
+! CHECK-LABEL: func.func @_QPdefault_stream_i32_literal()
+! CHECK: %[[ZERO1:.*]] = arith.constant 0 : i32
+! CHECK: %[[STRM1:.*]] = fir.convert %[[ZERO1]] : (i32) -> i64
+! CHECK: %{{.*}} = fir.call @_FortranACUFSetDefaultStream(%[[STRM1]]) fastmath<contract> : (i64) -> i32
+! CHECK: %[[ZERO2:.*]] = arith.constant 0 : i32
+! CHECK: %[[STRM2:.*]] = fir.convert %[[ZERO2]] : (i32) -> i64
+! CHECK: %{{.*}} = fir.call @_FortranACUFStreamDestroy(%[[STRM2]]) fastmath<contract> : (i64) -> i32
+
+subroutine default_stream_i32_var
+ use cuda_runtime_api
+ integer :: istat
+ integer(4) :: s
+ s = 0
+ istat = cudaforSetDefaultStream(s)
+ istat = cudaStreamDestroy(s)
+end subroutine
+
+! CHECK-LABEL: func.func @_QPdefault_stream_i32_var()
+! CHECK: %[[L1:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+! CHECK: %[[V1:.*]] = fir.convert %[[L1]] : (i32) -> i64
+! CHECK: %{{.*}} = fir.call @_FortranACUFSetDefaultStream(%[[V1]]) fastmath<contract> : (i64) -> i32
+! CHECK: %[[L2:.*]] = fir.load %{{.*}} : !fir.ref<i32>
+! CHECK: %[[V2:.*]] = fir.convert %[[L2]] : (i32) -> i64
+! CHECK: %{{.*}} = fir.call @_FortranACUFStreamDestroy(%[[V2]]) fastmath<contract> : (i64) -> i32
More information about the flang-commits
mailing list