[flang-commits] [flang] [flang][cuda] Support gpu.launch_func with async token in target rewrite pass (PR #165485)
via flang-commits
flang-commits at lists.llvm.org
Tue Oct 28 15:41:09 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-flang-codegen
Author: Valentin Clement (バレンタイン クレメン) (clementval)
<details>
<summary>Changes</summary>
---
Full diff: https://github.com/llvm/llvm-project/pull/165485.diff
2 Files Affected:
- (modified) flang/lib/Optimizer/CodeGen/TargetRewrite.cpp (+7-2)
- (modified) flang/test/Fir/CUDA/cuda-target-rewrite.mlir (+20)
``````````diff
diff --git a/flang/lib/Optimizer/CodeGen/TargetRewrite.cpp b/flang/lib/Optimizer/CodeGen/TargetRewrite.cpp
index 0776346870c72..0d0dbb01a799d 100644
--- a/flang/lib/Optimizer/CodeGen/TargetRewrite.cpp
+++ b/flang/lib/Optimizer/CodeGen/TargetRewrite.cpp
@@ -143,7 +143,8 @@ class TargetRewrite : public fir::impl::TargetRewritePassBase<TargetRewrite> {
llvm::SmallVector<mlir::Type> operandsTypes;
for (auto arg : gpuLaunchFunc.getKernelOperands())
operandsTypes.push_back(arg.getType());
- auto fctTy = mlir::FunctionType::get(&context, operandsTypes, {});
+ auto fctTy = mlir::FunctionType::get(&context, operandsTypes,
+ gpuLaunchFunc.getResultTypes());
if (!hasPortableSignature(fctTy, op))
convertCallOp(gpuLaunchFunc, fctTy);
} else if (auto addr = mlir::dyn_cast<fir::AddrOfOp>(op)) {
@@ -520,10 +521,14 @@ class TargetRewrite : public fir::impl::TargetRewritePassBase<TargetRewrite> {
llvm::SmallVector<mlir::Value, 1> newCallResults;
// TODO propagate/update call argument and result attributes.
if constexpr (std::is_same_v<std::decay_t<A>, mlir::gpu::LaunchFuncOp>) {
+ mlir::Value asyncToken = callOp.getAsyncToken();
auto newCall = A::create(*rewriter, loc, callOp.getKernel(),
callOp.getGridSizeOperandValues(),
callOp.getBlockSizeOperandValues(),
- callOp.getDynamicSharedMemorySize(), newOpers);
+ callOp.getDynamicSharedMemorySize(), newOpers,
+ asyncToken ? asyncToken.getType() : nullptr,
+ callOp.getAsyncDependencies(),
+ /**/ std::nullopt);
if (callOp.getClusterSizeX())
newCall.getClusterSizeXMutable().assign(callOp.getClusterSizeX());
if (callOp.getClusterSizeY())
diff --git a/flang/test/Fir/CUDA/cuda-target-rewrite.mlir b/flang/test/Fir/CUDA/cuda-target-rewrite.mlir
index 48fee10f3db97..5562e00085526 100644
--- a/flang/test/Fir/CUDA/cuda-target-rewrite.mlir
+++ b/flang/test/Fir/CUDA/cuda-target-rewrite.mlir
@@ -108,3 +108,23 @@ module attributes {gpu.container_module, fir.defaultkind = "a1c4d8i4l4r4", fir.k
}
}
+// -----
+
+module attributes {gpu.container_module, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.target_triple = "x86_64-unknown-linux-gnu"} {
+ gpu.module @testmod {
+ gpu.func @_QPtest(%arg0: complex<f32>) -> () kernel {
+ gpu.return
+ }
+ }
+ func.func @main(%arg0: complex<f32>) {
+ %0 = llvm.mlir.constant(0 : i64) : i64
+ %1 = llvm.mlir.constant(0 : i32) : i32
+ %2 = fir.alloca i64
+ %3 = cuf.stream_cast %2 : !fir.ref<i64>
+ %4 = gpu.launch_func async [%3] @testmod::@_QPtest blocks in (%0, %0, %0) threads in (%0, %0, %0) : i64 dynamic_shared_memory_size %1 args(%arg0 : complex<f32>) {cuf.proc_attr = #cuf.cuda_proc<global>}
+ return
+ }
+}
+
+// CHECK-LABEL: func.func @main
+// CHECK: %{{.*}} = gpu.launch_func async [%{{.*}}] @testmod::@_QPtest blocks in (%{{.*}}, %{{.*}}, %{{.*}}) threads in (%{{.*}}, %{{.*}}, %{{.*}}) : i64 dynamic_shared_memory_size %{{.*}} args(%{{.*}} : !fir.vector<2:f32>) {cuf.proc_attr = #cuf.cuda_proc<global>}
``````````
</details>
https://github.com/llvm/llvm-project/pull/165485
More information about the flang-commits
mailing list