[flang-commits] [flang] [flang][cuda] Add fence after barrier_init (PR #163016)
Valentin Clement バレンタイン クレメン via flang-commits
flang-commits at lists.llvm.org
Sat Oct 11 12:20:44 PDT 2025
https://github.com/clementval created https://github.com/llvm/llvm-project/pull/163016
Add a fence after the barrier init instruction as it is done in the reference compiler.
>From c5c7c99389bf1498fd9431bd31722fcacd7b1237 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Sat, 11 Oct 2025 12:19:48 -0700
Subject: [PATCH] [flang][cuda] Add fence after barrier_init
---
flang/lib/Optimizer/Builder/IntrinsicCall.cpp | 5 +++++
flang/test/Lower/CUDA/cuda-device-proc.cuf | 1 +
2 files changed, 6 insertions(+)
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index c9cf6c23a81a5..fa78c62deef15 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -3236,6 +3236,11 @@ void IntrinsicLibrary::genBarrierInit(llvm::ArrayRef<fir::ExtendedValue> args) {
convertBarrierToLLVM(builder, loc, fir::getBase(args[0]));
mlir::NVVM::MBarrierInitSharedOp::create(builder, loc, barrier,
fir::getBase(args[1]), {});
+ auto kind = mlir::NVVM::ProxyKindAttr::get(
+ builder.getContext(), mlir::NVVM::ProxyKind::async_shared);
+ auto space = mlir::NVVM::SharedSpaceAttr::get(
+ builder.getContext(), mlir::NVVM::SharedSpace::shared_cta);
+ mlir::NVVM::FenceProxyOp::create(builder, loc, kind, space);
}
// BESSEL_JN
diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf
index 1bf714010f5d3..5744d5dd8ea21 100644
--- a/flang/test/Lower/CUDA/cuda-device-proc.cuf
+++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf
@@ -411,6 +411,7 @@ end subroutine
! CHECK: %[[LLVM_PTR:.*]] = fir.convert %[[DECL_SHARED]]#0 : (!fir.ref<i64>) -> !llvm.ptr
! CHECK: %[[SHARED_PTR:.*]] = llvm.addrspacecast %[[LLVM_PTR]] : !llvm.ptr to !llvm.ptr<3>
! CHECK: nvvm.mbarrier.init.shared %[[SHARED_PTR]], %[[COUNT]] : !llvm.ptr<3>, i32
+! CHECK: nvvm.fence.proxy {kind = #nvvm.proxy_kind<async.shared>, space = #nvvm.shared_space<cta>}
! CHECK: %[[LLVM_PTR:.*]] = fir.convert %[[DECL_SHARED]]#0 : (!fir.ref<i64>) -> !llvm.ptr
! CHECK: %[[SHARED_PTR:.*]] = llvm.addrspacecast %[[LLVM_PTR]] : !llvm.ptr to !llvm.ptr<3>
More information about the flang-commits
mailing list