[flang-commits] [flang] [flang][cuda] Implement this_cluster for cooperative groups (PR #169414)
via flang-commits
flang-commits at lists.llvm.org
Mon Nov 24 13:57:55 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-flang-fir-hlfir
Author: Valentin Clement (バレンタイン クレメン) (clementval)
<details>
<summary>Changes</summary>
Implement `this_cluster` like `this_group` by lowering it directly like an intrinsic function. Use the NVVM operation to get the rank and size information and populate the derived type.
---
Full diff: https://github.com/llvm/llvm-project/pull/169414.diff
3 Files Affected:
- (modified) flang/include/flang/Optimizer/Builder/CUDAIntrinsicCall.h (+1)
- (modified) flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp (+42)
- (modified) flang/module/cooperative_groups.f90 (+13)
``````````diff
diff --git a/flang/include/flang/Optimizer/Builder/CUDAIntrinsicCall.h b/flang/include/flang/Optimizer/Builder/CUDAIntrinsicCall.h
index ae7d566920656..027bd3b79a1df 100644
--- a/flang/include/flang/Optimizer/Builder/CUDAIntrinsicCall.h
+++ b/flang/include/flang/Optimizer/Builder/CUDAIntrinsicCall.h
@@ -60,6 +60,7 @@ struct CUDAIntrinsicLibrary : IntrinsicLibrary {
mlir::Value genSyncThreadsCount(mlir::Type, llvm::ArrayRef<mlir::Value>);
mlir::Value genSyncThreadsOr(mlir::Type, llvm::ArrayRef<mlir::Value>);
void genSyncWarp(llvm::ArrayRef<fir::ExtendedValue>);
+ mlir::Value genThisCluster(mlir::Type, llvm::ArrayRef<mlir::Value>);
mlir::Value genThisGrid(mlir::Type, llvm::ArrayRef<mlir::Value>);
mlir::Value genThisThreadBlock(mlir::Type, llvm::ArrayRef<mlir::Value>);
mlir::Value genThisWarp(mlir::Type, llvm::ArrayRef<mlir::Value>);
diff --git a/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp b/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp
index f67129dfa6730..c560c53033780 100644
--- a/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/CUDAIntrinsicCall.cpp
@@ -457,6 +457,10 @@ static constexpr IntrinsicHandler cudaHandlers[]{
static_cast<CUDAIntrinsicLibrary::SubroutineGenerator>(&CI::genSyncWarp),
{},
/*isElemental=*/false},
+ {"this_cluster",
+ static_cast<CUDAIntrinsicLibrary::ElementalGenerator>(&CI::genThisCluster),
+ {},
+ /*isElemental=*/false},
{"this_grid",
static_cast<CUDAIntrinsicLibrary::ElementalGenerator>(&CI::genThisGrid),
{},
@@ -1122,6 +1126,44 @@ void CUDAIntrinsicLibrary::genSyncWarp(
mlir::NVVM::SyncWarpOp::create(builder, loc, fir::getBase(args[0]));
}
+// THIS_CLUSTER
+mlir::Value
+CUDAIntrinsicLibrary::genThisCluster(mlir::Type resultType,
+ llvm::ArrayRef<mlir::Value> args) {
+ assert(args.size() == 0);
+ auto recTy = mlir::cast<fir::RecordType>(resultType);
+ assert(recTy && "RecordType expepected");
+ mlir::Value res = fir::AllocaOp::create(builder, loc, resultType);
+ mlir::Type i32Ty = builder.getI32Type();
+
+ // SIZE
+ mlir::Value size = mlir::NVVM::ClusterDim::create(builder, loc, i32Ty);
+ auto sizeFieldName = recTy.getTypeList()[1].first;
+ mlir::Type sizeFieldTy = recTy.getTypeList()[1].second;
+ mlir::Type fieldIndexType = fir::FieldType::get(resultType.getContext());
+ mlir::Value sizeFieldIndex = fir::FieldIndexOp::create(
+ builder, loc, fieldIndexType, sizeFieldName, recTy,
+ /*typeParams=*/mlir::ValueRange{});
+ mlir::Value sizeCoord = fir::CoordinateOp::create(
+ builder, loc, builder.getRefType(sizeFieldTy), res, sizeFieldIndex);
+ fir::StoreOp::create(builder, loc, size, sizeCoord);
+
+ // RANK
+ mlir::Value rank = mlir::NVVM::ClusterId::create(builder, loc, i32Ty);
+ mlir::Value one = builder.createIntegerConstant(loc, i32Ty, 1);
+ rank = mlir::arith::AddIOp::create(builder, loc, rank, one);
+ auto rankFieldName = recTy.getTypeList()[2].first;
+ mlir::Type rankFieldTy = recTy.getTypeList()[2].second;
+ mlir::Value rankFieldIndex = fir::FieldIndexOp::create(
+ builder, loc, fieldIndexType, rankFieldName, recTy,
+ /*typeParams=*/mlir::ValueRange{});
+ mlir::Value rankCoord = fir::CoordinateOp::create(
+ builder, loc, builder.getRefType(rankFieldTy), res, rankFieldIndex);
+ fir::StoreOp::create(builder, loc, rank, rankCoord);
+
+ return res;
+}
+
// THIS_GRID
mlir::Value
CUDAIntrinsicLibrary::genThisGrid(mlir::Type resultType,
diff --git a/flang/module/cooperative_groups.f90 b/flang/module/cooperative_groups.f90
index b8875f72f8079..1c89866f9c84a 100644
--- a/flang/module/cooperative_groups.f90
+++ b/flang/module/cooperative_groups.f90
@@ -14,6 +14,12 @@ module cooperative_groups
implicit none
+type :: cluster_group
+ type(c_devptr), private :: handle
+ integer(4) :: size
+ integer(4) :: rank
+end type cluster_group
+
type :: grid_group
type(c_devptr), private :: handle
integer(4) :: size
@@ -32,6 +38,13 @@ module cooperative_groups
integer(4) :: rank
end type thread_group
+interface
+ attributes(device) function this_cluster()
+ import
+ type(cluster_group) :: this_cluster
+ end function
+end interface
+
interface
attributes(device) function this_grid()
import
``````````
</details>
https://github.com/llvm/llvm-project/pull/169414
More information about the flang-commits
mailing list