[flang-commits] [flang] 548e013 - [flang][cuda] Add fence after barrier_init (#163016)

via flang-commits flang-commits at lists.llvm.org
Sat Oct 11 12:38:43 PDT 2025


Author: Valentin Clement (バレンタイン クレメン)
Date: 2025-10-11T19:38:39Z
New Revision: 548e0137bb9dd473927e5d0e8a8140f66aee747c

URL: https://github.com/llvm/llvm-project/commit/548e0137bb9dd473927e5d0e8a8140f66aee747c
DIFF: https://github.com/llvm/llvm-project/commit/548e0137bb9dd473927e5d0e8a8140f66aee747c.diff

LOG: [flang][cuda] Add fence after barrier_init (#163016)

Add a fence after the barrier init instruction as it is done in the
reference compiler.

Added: 
    

Modified: 
    flang/lib/Optimizer/Builder/IntrinsicCall.cpp
    flang/test/Lower/CUDA/cuda-device-proc.cuf

Removed: 
    


################################################################################
diff  --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index e5b70eed9926b..b3a2d49c357db 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -3244,6 +3244,11 @@ void IntrinsicLibrary::genBarrierInit(llvm::ArrayRef<fir::ExtendedValue> args) {
       convertBarrierToLLVM(builder, loc, fir::getBase(args[0]));
   mlir::NVVM::MBarrierInitSharedOp::create(builder, loc, barrier,
                                            fir::getBase(args[1]), {});
+  auto kind = mlir::NVVM::ProxyKindAttr::get(
+      builder.getContext(), mlir::NVVM::ProxyKind::async_shared);
+  auto space = mlir::NVVM::SharedSpaceAttr::get(
+      builder.getContext(), mlir::NVVM::SharedSpace::shared_cta);
+  mlir::NVVM::FenceProxyOp::create(builder, loc, kind, space);
 }
 
 // BESSEL_JN

diff  --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf
index 487a6b2883f46..eef4225a62a77 100644
--- a/flang/test/Lower/CUDA/cuda-device-proc.cuf
+++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf
@@ -411,6 +411,7 @@ end subroutine
 ! CHECK: %[[LLVM_PTR:.*]] = fir.convert %[[DECL_SHARED]]#0 : (!fir.ref<i64>) -> !llvm.ptr
 ! CHECK: %[[SHARED_PTR:.*]] = llvm.addrspacecast %[[LLVM_PTR]] : !llvm.ptr to !llvm.ptr<3>
 ! CHECK: nvvm.mbarrier.init.shared %[[SHARED_PTR]], %[[COUNT]] : !llvm.ptr<3>, i32
+! CHECK: nvvm.fence.proxy {kind = #nvvm.proxy_kind<async.shared>, space = #nvvm.shared_space<cta>}
 
 ! CHECK: %[[LLVM_PTR:.*]] = fir.convert %[[DECL_SHARED]]#0 : (!fir.ref<i64>) -> !llvm.ptr
 ! CHECK: %[[SHARED_PTR:.*]] = llvm.addrspacecast %[[LLVM_PTR]] : !llvm.ptr to !llvm.ptr<3>


        


More information about the flang-commits mailing list