[flang-commits] [flang] [flang][cuda] Add interface and lowering for fence_proxy_async (PR #163014)
Valentin Clement バレンタイン クレメン via flang-commits
flang-commits at lists.llvm.org
Sat Oct 11 12:27:11 PDT 2025
Valentin Clement =?utf-8?b?KOODkOODrOODsw=?Message-ID:
In-Reply-To: <llvm.org/llvm/llvm-project/pull/163014 at github.com>
https://github.com/clementval updated https://github.com/llvm/llvm-project/pull/163014
>From d400fe59a5b9beddea3bc4b2d4568ccd7cbcfa66 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Sat, 11 Oct 2025 12:09:27 -0700
Subject: [PATCH 1/2] [flang][cuda] Add interface and lowering for
fence_proxy_async
---
.../flang/Optimizer/Builder/IntrinsicCall.h | 1 +
flang/lib/Optimizer/Builder/IntrinsicCall.cpp | 15 +++++++++++++++
flang/module/cudadevice.f90 | 5 +++++
flang/test/Lower/CUDA/cuda-device-proc.cuf | 7 +++++++
4 files changed, 28 insertions(+)
diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
index ca02693c53aeb..d0a96a512c2e7 100644
--- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
+++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
@@ -274,6 +274,7 @@ struct IntrinsicLibrary {
llvm::ArrayRef<fir::ExtendedValue>);
template <Extremum, ExtremumBehavior>
mlir::Value genExtremum(mlir::Type, llvm::ArrayRef<mlir::Value>);
+ void genFenceProxyAsync(llvm::ArrayRef<fir::ExtendedValue>);
mlir::Value genFloor(mlir::Type, llvm::ArrayRef<mlir::Value>);
mlir::Value genFraction(mlir::Type resultType,
mlir::ArrayRef<mlir::Value> args);
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index c9cf6c23a81a5..4890225db452f 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -502,6 +502,10 @@ static constexpr IntrinsicHandler handlers[]{
&I::genExtendsTypeOf,
{{{"a", asBox}, {"mold", asBox}}},
/*isElemental=*/false},
+ {"fence_proxy_async",
+ &I::genFenceProxyAsync,
+ {},
+ /*isElemental=*/false},
{"findloc",
&I::genFindloc,
{{{"array", asBox},
@@ -4354,6 +4358,17 @@ IntrinsicLibrary::genExtendsTypeOf(mlir::Type resultType,
fir::getBase(args[1])));
}
+// FENCE_PROXY_ASYNC (CUDA)
+void IntrinsicLibrary::genFenceProxyAsync(
+ llvm::ArrayRef<fir::ExtendedValue> args) {
+ assert(args.size() == 0);
+ auto kind = mlir::NVVM::ProxyKindAttr::get(
+ builder.getContext(), mlir::NVVM::ProxyKind::async_shared);
+ auto space = mlir::NVVM::SharedSpaceAttr::get(
+ builder.getContext(), mlir::NVVM::SharedSpace::shared_cta);
+ mlir::NVVM::FenceProxyOp::create(builder, loc, kind, space);
+}
+
// FINDLOC
fir::ExtendedValue
IntrinsicLibrary::genFindloc(mlir::Type resultType,
diff --git a/flang/module/cudadevice.f90 b/flang/module/cudadevice.f90
index e6c9e958af365..548298ef854c9 100644
--- a/flang/module/cudadevice.f90
+++ b/flang/module/cudadevice.f90
@@ -2008,6 +2008,11 @@ attributes(device) function barrier_arrive_cnt(barrier, count) result(token)
end function
end interface
+ interface
+ attributes(device) subroutine fence_proxy_async()
+ end subroutine
+ end interface
+
contains
attributes(device) subroutine syncthreads()
diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf
index 1bf714010f5d3..378d8ddf65ad9 100644
--- a/flang/test/Lower/CUDA/cuda-device-proc.cuf
+++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf
@@ -419,3 +419,10 @@ end subroutine
! CHECK: %[[LLVM_PTR:.*]] = fir.convert %[[DECL_SHARED]]#0 : (!fir.ref<i64>) -> !llvm.ptr
! CHECK: %[[SHARED_PTR:.*]] = llvm.addrspacecast %[[LLVM_PTR]] : !llvm.ptr to !llvm.ptr<3>
! CHECK: nvvm.mbarrier.arrive.expect_tx %[[SHARED_PTR]], %{{.*}} : !llvm.ptr<3>, i32
+
+attributes(global) subroutine test_fence()
+ call fence_proxy_async()
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_fence()
+! CHECK: nvvm.fence.proxy {kind = #nvvm.proxy_kind<async.shared>, space = #nvvm.shared_space<cta>}
>From 070d11ed9047d968c6b77692b9f5d443744f506f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?=
=?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?=
=?UTF-8?q?=E3=83=B3=29?= <clementval at gmail.com>
Date: Sat, 11 Oct 2025 12:27:03 -0700
Subject: [PATCH 2/2] Update flang/test/Lower/CUDA/cuda-device-proc.cuf
---
flang/test/Lower/CUDA/cuda-device-proc.cuf | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf
index 49cd026c8fbef..50c0938a09cab 100644
--- a/flang/test/Lower/CUDA/cuda-device-proc.cuf
+++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf
@@ -435,4 +435,4 @@ end subroutine
! CHECK-LABEL: func.func @_QPtest_tma()
! CHECK: nvvm.cp.async.bulk.commit.group
-! CHECK: nvvm.cp.async.bulk.wait_group 0
\ No newline at end of file
+! CHECK: nvvm.cp.async.bulk.wait_group 0
More information about the flang-commits
mailing list