[flang-commits] [flang] [flang][cuda] Add interface and lower barrier_init (PR #162929)
Valentin Clement バレンタイン クレメン via flang-commits
flang-commits at lists.llvm.org
Fri Oct 10 14:52:32 PDT 2025
https://github.com/clementval created https://github.com/llvm/llvm-project/pull/162929
Add interface for `barrier_init` and lower it to the NVVM Op.
https://github.com/clementval/llvm-project/pull/new/cuf_barrier_init
>From bc1ee80eb8abe784f42e59497202bd43ca154bb6 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Fri, 10 Oct 2025 14:50:27 -0700
Subject: [PATCH] [flang][cuda] Add interface and lower barrier_init
---
.../flang/Optimizer/Builder/IntrinsicCall.h | 1 +
flang/lib/Optimizer/Builder/IntrinsicCall.cpp | 20 +++++++++++++++++++
flang/module/cudadevice.f90 | 7 +++++++
flang/test/Lower/CUDA/cuda-device-proc.cuf | 15 ++++++++++++++
4 files changed, 43 insertions(+)
diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
index 320f913858956..695221cbcb42c 100644
--- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
+++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
@@ -208,6 +208,7 @@ struct IntrinsicLibrary {
fir::ExtendedValue genAssociated(mlir::Type,
llvm::ArrayRef<fir::ExtendedValue>);
mlir::Value genAtand(mlir::Type, llvm::ArrayRef<mlir::Value>);
+ void genBarrierInit(llvm::ArrayRef<fir::ExtendedValue>);
fir::ExtendedValue genBesselJn(mlir::Type,
llvm::ArrayRef<fir::ExtendedValue>);
fir::ExtendedValue genBesselYn(mlir::Type,
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index de7694ffd468c..2c21868295528 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -346,6 +346,10 @@ static constexpr IntrinsicHandler handlers[]{
&I::genVoteSync<mlir::NVVM::VoteSyncKind::ballot>,
{{{"mask", asValue}, {"pred", asValue}}},
/*isElemental=*/false},
+ {"barrier_init",
+ &I::genBarrierInit,
+ {{{"barrier", asAddr}, {"count", asValue}}},
+ /*isElemental=*/false},
{"bessel_jn",
&I::genBesselJn,
{{{"n1", asValue}, {"n2", asValue}, {"x", asValue}}},
@@ -3176,6 +3180,22 @@ IntrinsicLibrary::genAssociated(mlir::Type resultType,
return fir::runtime::genAssociated(builder, loc, pointerBox, targetBox);
}
+// BARRIER_INIT (CUDA)
+void IntrinsicLibrary::genBarrierInit(llvm::ArrayRef<fir::ExtendedValue> args) {
+ assert(args.size() == 2);
+ auto llvmPtr = fir::ConvertOp::create(
+ builder, loc, mlir::LLVM::LLVMPointerType::get(builder.getContext()),
+ fir::getBase(args[0]));
+ auto addrCast = mlir::LLVM::AddrSpaceCastOp::create(
+ builder, loc,
+ mlir::LLVM::LLVMPointerType::get(
+ builder.getContext(),
+ static_cast<unsigned>(mlir::NVVM::NVVMMemorySpace::Shared)),
+ llvmPtr);
+ mlir::NVVM::MBarrierInitSharedOp::create(builder, loc, addrCast,
+ fir::getBase(args[1]), {});
+}
+
// BESSEL_JN
fir::ExtendedValue
IntrinsicLibrary::genBesselJn(mlir::Type resultType,
diff --git a/flang/module/cudadevice.f90 b/flang/module/cudadevice.f90
index 1598c64db2cb5..4f552dcf08372 100644
--- a/flang/module/cudadevice.f90
+++ b/flang/module/cudadevice.f90
@@ -1987,6 +1987,13 @@ attributes(device,host) logical function on_device() bind(c)
end function
end interface
+ interface
+ attributes(device) subroutine barrier_init(barrier, count)
+ integer(8) :: barrier
+ integer(4) :: count
+ end subroutine
+ end interface
+
contains
attributes(device) subroutine syncthreads()
diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf
index 5e1f6b66d1d53..1f825436cf416 100644
--- a/flang/test/Lower/CUDA/cuda-device-proc.cuf
+++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf
@@ -417,3 +417,18 @@ end subroutine
! CHECK-DAG: func.func private @__ldcs_r8x2_(!fir.ref<!fir.array<2xf64>>, !fir.ref<!fir.array<2xf64>>)
! CHECK-DAG: func.func private @__ldlu_r8x2_(!fir.ref<!fir.array<2xf64>>, !fir.ref<!fir.array<2xf64>>)
! CHECK-DAG: func.func private @__ldcv_r8x2_(!fir.ref<!fir.array<2xf64>>, !fir.ref<!fir.array<2xf64>>)
+
+attributes(global) subroutine test_barrier()
+ integer(8), shared :: barrier
+ call barrier_init(barrier, 256)
+end subroutine
+
+
+! CHECK-LABEL: func.func @_QPtest_barrier()
+
+! CHECK: %[[SHARED:.*]] = cuf.shared_memory i64 {bindc_name = "barrier", uniq_name = "_QFtest_barrierEbarrier"} -> !fir.ref<i64>
+! CHECK: %[[DECL_SHARED:.*]]:2 = hlfir.declare %[[SHARED]] {data_attr = #cuf.cuda<shared>, uniq_name = "_QFtest_barrierEbarrier"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
+! CHECK: %[[COUNT:.*]] = arith.constant 256 : i32
+! CHECK: %[[LLVM_PTR:.*]] = fir.convert %[[DECL_SHARED]]#0 : (!fir.ref<i64>) -> !llvm.ptr
+! CHECK: %[[SHARED_PTR:.*]] = llvm.addrspacecast %[[LLVM_PTR]] : !llvm.ptr to !llvm.ptr<3>
+! CHECK: nvvm.mbarrier.init.shared %[[SHARED_PTR]], %[[COUNT]] : !llvm.ptr<3>, i32
More information about the flang-commits
mailing list