[Mlir-commits] [mlir] [MLIR][NVVM] Update mbarrier.arrive.* Op (PR #168758)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Wed Nov 19 11:05:01 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mlir-llvm
Author: Durgadoss R (durga4github)
<details>
<summary>Changes</summary>
This patch updates the mbarrier.arrive.* family of Ops to include
all features added up-to Blackwell.
* Update the `mbarrier.arrive` Op to include shared_cluster
memory space, cta/cluster scope and an option to lower using
relaxed semantics.
* An `arrive_drop` variant is added for both the `arrive` and
`arrive.nocomplete` operations.
* Verifier checks are added wherever appropriate.
* lit tests are added to verify the lowering to the intrinsics.
TODO:
* Updates for the remaining mbarrier family will be done in
subsequent PRs. (mainly, expect/complete-tx, arrive.expect-tx,
and {test/try}waits.
---
Patch is 33.11 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/168758.diff
6 Files Affected:
- (modified) mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td (+107-12)
- (modified) mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp (+131-5)
- (added) mlir/test/Target/LLVMIR/nvvm/mbar_arrive.mlir (+103)
- (added) mlir/test/Target/LLVMIR/nvvm/mbar_arrive_drop.mlir (+103)
- (renamed) mlir/test/Target/LLVMIR/nvvm/mbar_init.mlir (-40)
- (added) mlir/test/Target/LLVMIR/nvvm/mbar_invalid.mlir (+33)
``````````diff
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index 6e3a92b5bde42..b4fdd0aed9f56 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -716,9 +716,7 @@ def NVVM_MBarrierInvalOp : NVVM_Op<"mbarrier.inval">,
}];
}
-def NVVM_MBarrierArriveOp : NVVM_Op<"mbarrier.arrive">,
- Results<(outs I64:$res)>,
- Arguments<(ins AnyTypeOf<[LLVM_PointerGeneric, LLVM_PointerShared]>:$addr)> {
+def NVVM_MBarrierArriveOp : NVVM_Op<"mbarrier.arrive"> {
let summary = "MBarrier Arrive Operation";
let description = [{
The `nvvm.mbarrier.arrive` operation performs an arrive-on operation on the
@@ -730,19 +728,40 @@ def NVVM_MBarrierArriveOp : NVVM_Op<"mbarrier.arrive">,
with this release pattern.
This operation causes the executing thread to signal its arrival at the barrier.
- The operation returns an opaque value that captures the phase of the
- *mbarrier object* prior to the arrive-on operation. The contents of this state
- value are implementation-specific.
- The operation takes the following operand:
+ - `res`: When the `space` is not shared_cluster, this operation returns an
+ opaque 64-bit value capturing the phase of the *mbarrier object* prior to
+ the arrive-on operation. The contents of this return value are
+ implementation-specific. An *mbarrier object* located in the shared_cluster
+ space cannot return a value.
+
+ The operation takes the following operands:
- `addr`: A pointer to the memory location of the *mbarrier object*. The `addr`
- must be a pointer to generic or shared::cta memory. When it is generic, the
- underlying address must be within the shared::cta memory space; otherwise
- the behavior is undefined.
+ must be a pointer to generic or shared_cta or shared_cluster memory. When it
+ is generic, the underlying address must be within the shared_cta memory space;
+ otherwise the behavior is undefined.
+ - `count`: This specifies the amount by which the pending arrival count is
+ decremented. If the `count` argument is not specified, the pending arrival
+ count is decremented by 1.
+ - `scope`: This specifies the set of threads that directly observe the memory
+ synchronizing effect of the `mbarrier.arrive` operation.
+ - `space`: This indicates the memory space where the mbarrier object resides.
+ - `relaxed`: When set to true, the `arrive` operation has relaxed memory semantics
+ and does not provide any ordering or visibility guarantees.
[For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-mbarrier-arrive)
}];
- let assemblyFormat = "$addr attr-dict `:` type($addr) `->` type($res)";
+
+ let results = (outs Optional<I64>:$res);
+ let arguments = (ins
+ AnyTypeOf<[LLVM_PointerGeneric, LLVM_PointerShared, LLVM_PointerSharedCluster]>:$addr,
+ Optional<I32>:$count,
+ DefaultValuedAttr<MemScopeKindAttr, "MemScopeKind::CTA">:$scope,
+ DefaultValuedAttr<BoolAttr, "false">:$relaxed);
+
+ let assemblyFormat = "$addr (`,` $count^)? attr-dict `:` type($addr) (`->` type($res)^)?";
+
+ let hasVerifier = 1;
let extraClassDeclaration = [{
static mlir::NVVM::IDArgPair
@@ -753,7 +772,54 @@ def NVVM_MBarrierArriveOp : NVVM_Op<"mbarrier.arrive">,
string llvmBuilder = [{
auto [id, args] = NVVM::MBarrierArriveOp::getIntrinsicIDAndArgs(
*op, moduleTranslation, builder);
- $res = createIntrinsicCall(builder, id, args);
+
+ int addrSpace = llvm::cast<LLVMPointerType>(op.getAddr().getType()).getAddressSpace();
+ if (addrSpace != static_cast<unsigned>(NVVM::NVVMMemorySpace::SharedCluster))
+ $res = createIntrinsicCall(builder, id, args);
+ else
+ createIntrinsicCall(builder, id, args);
+ }];
+}
+
+def NVVM_MBarrierArriveDropOp : NVVM_Op<"mbarrier.arrive_drop"> {
+ let summary = "MBarrier Arrive-Drop Operation";
+ let description = [{
+ The `nvvm.mbarrier.arrive_drop` operation decrements the expected arrival
+ count of the *mbarrier object* by `count` and then performs an arrive-on
+ operation. When `count` is not specified, it defaults to 1. The decrement
+ of the expected arrival count applies to all the subsequent phases of the
+ *mbarrier object*. The remaining semantics are identical to those of the
+ `nvvm.mbarrier.arrive` operation.
+
+ [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-mbarrier-arrive-drop)
+ }];
+
+ let results = (outs Optional<I64>:$res);
+ let arguments = (ins
+ AnyTypeOf<[LLVM_PointerGeneric, LLVM_PointerShared, LLVM_PointerSharedCluster]>:$addr,
+ Optional<I32>:$count,
+ DefaultValuedAttr<MemScopeKindAttr, "MemScopeKind::CTA">:$scope,
+ DefaultValuedAttr<BoolAttr, "false">:$relaxed);
+
+ let assemblyFormat = "$addr (`,` $count^)? attr-dict `:` type($addr) (`->` type($res)^)?";
+
+ let hasVerifier = 1;
+
+ let extraClassDeclaration = [{
+ static mlir::NVVM::IDArgPair
+ getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt,
+ llvm::IRBuilderBase& builder);
+ }];
+
+ string llvmBuilder = [{
+ auto [id, args] = NVVM::MBarrierArriveDropOp::getIntrinsicIDAndArgs(
+ *op, moduleTranslation, builder);
+
+ int addrSpace = llvm::cast<LLVMPointerType>(op.getAddr().getType()).getAddressSpace();
+ if (addrSpace != static_cast<unsigned>(NVVM::NVVMMemorySpace::SharedCluster))
+ $res = createIntrinsicCall(builder, id, args);
+ else
+ createIntrinsicCall(builder, id, args);
}];
}
@@ -803,6 +869,35 @@ def NVVM_MBarrierArriveNocompleteOp : NVVM_Op<"mbarrier.arrive.nocomplete">,
}];
}
+def NVVM_MBarrierArriveDropNocompleteOp : NVVM_Op<"mbarrier.arrive_drop.nocomplete">,
+ Results<(outs I64:$res)>,
+ Arguments<(ins AnyTypeOf<[LLVM_PointerGeneric, LLVM_PointerShared]>:$addr,
+ I32:$count)> {
+ let summary = "MBarrier Arrive-Drop No-Complete Operation";
+ let description = [{
+ The `nvvm.mbarrier.arrive_drop.nocomplete` operation decrements the expected
+ arrival count of the *mbarrier object* by the amount `count` and then performs
+ an arrive-on operation on the *mbarrier object* with the guarantee that it
+ will not cause the barrier to complete its current phase.
+
+ [For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-mbarrier-arrive-drop)
+ }];
+
+ let assemblyFormat = "$addr `,` $count attr-dict `:` type(operands) `->` type($res)";
+
+ let extraClassDeclaration = [{
+ static mlir::NVVM::IDArgPair
+ getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt,
+ llvm::IRBuilderBase& builder);
+ }];
+
+ string llvmBuilder = [{
+ auto [id, args] = NVVM::MBarrierArriveDropNocompleteOp::getIntrinsicIDAndArgs(
+ *op, moduleTranslation, builder);
+ $res = createIntrinsicCall(builder, id, args);
+ }];
+}
+
def NVVM_MBarrierArriveExpectTxOp : NVVM_PTXBuilder_Op<"mbarrier.arrive.expect_tx">,
Arguments<(ins
AnyTypeOf<[LLVM_PointerGeneric, LLVM_PointerShared]>:$addr,
diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
index 7ac427dbe3941..568c883cfc962 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
@@ -56,10 +56,26 @@ static bool isPtrInAddrSpace(mlir::Value ptr, NVVMMemorySpace targetAS) {
return ptrTy.getAddressSpace() == static_cast<unsigned>(targetAS);
}
+static bool isPtrInGenericSpace(mlir::Value ptr) {
+ return isPtrInAddrSpace(ptr, NVVMMemorySpace::Generic);
+}
+
static bool isPtrInSharedCTASpace(mlir::Value ptr) {
return isPtrInAddrSpace(ptr, NVVMMemorySpace::Shared);
}
+static bool isPtrInSharedClusterSpace(mlir::Value ptr) {
+ return isPtrInAddrSpace(ptr, NVVMMemorySpace::SharedCluster);
+}
+
+static llvm::Value *castPtrToAddrSpace(llvm::IRBuilderBase &builder,
+ llvm::Value *ptr,
+ NVVMMemorySpace targetAS) {
+ unsigned AS = static_cast<unsigned>(targetAS);
+ return builder.CreateAddrSpaceCast(
+ ptr, llvm::PointerType::get(builder.getContext(), AS));
+}
+
//===----------------------------------------------------------------------===//
// Verifier methods
//===----------------------------------------------------------------------===//
@@ -220,6 +236,32 @@ LogicalResult CpAsyncBulkGlobalToSharedClusterOp::verify() {
return success();
}
+static LogicalResult verifyMBarrierArriveLikeOp(Operation *op, Value addr,
+ NVVM::MemScopeKind scope,
+ Value retVal) {
+ bool isSharedCluster = isPtrInSharedClusterSpace(addr);
+ bool hasRetValue = static_cast<bool>(retVal);
+
+ if (scope != NVVM::MemScopeKind::CTA && scope != NVVM::MemScopeKind::CLUSTER)
+ return op->emitError("mbarrier scope must be either CTA or Cluster");
+
+ if (isSharedCluster && hasRetValue)
+ return op->emitError(
+ "mbarrier in shared_cluster space cannot return any value");
+
+ return success();
+}
+
+LogicalResult MBarrierArriveOp::verify() {
+ return verifyMBarrierArriveLikeOp(getOperation(), getAddr(), getScope(),
+ getRes());
+}
+
+LogicalResult MBarrierArriveDropOp::verify() {
+ return verifyMBarrierArriveLikeOp(getOperation(), getAddr(), getScope(),
+ getRes());
+}
+
LogicalResult ConvertFloatToTF32Op::verify() {
using RndMode = NVVM::FPRoundingMode;
switch (getRnd()) {
@@ -1864,12 +1906,81 @@ mlir::NVVM::IDArgPair MBarrierInvalOp::getIntrinsicIDAndArgs(
mlir::NVVM::IDArgPair MBarrierArriveOp::getIntrinsicIDAndArgs(
Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) {
auto thisOp = cast<NVVM::MBarrierArriveOp>(op);
- bool isShared = isPtrInSharedCTASpace(thisOp.getAddr());
- llvm::Intrinsic::ID id = isShared
- ? llvm::Intrinsic::nvvm_mbarrier_arrive_shared
- : llvm::Intrinsic::nvvm_mbarrier_arrive;
- return {id, {mt.lookupValue(thisOp.getAddr())}};
+ bool isClusterSpace = isPtrInSharedClusterSpace(thisOp.getAddr());
+ bool isClusterScope = thisOp.getScope() == NVVM::MemScopeKind::CLUSTER;
+ // bit-0: Space
+ // bit-1: Scope
+ size_t index = ((isClusterScope ? 1 : 0) << 1) | (isClusterSpace ? 1 : 0);
+
+ static constexpr llvm::Intrinsic::ID IDs[] = {
+ llvm::Intrinsic::nvvm_mbarrier_arrive_scope_cta_space_cta,
+ llvm::Intrinsic::nvvm_mbarrier_arrive_scope_cta_space_cluster,
+ llvm::Intrinsic::nvvm_mbarrier_arrive_scope_cluster_space_cta,
+ llvm::Intrinsic::nvvm_mbarrier_arrive_scope_cluster_space_cluster};
+ static constexpr llvm::Intrinsic::ID relaxedIDs[] = {
+ llvm::Intrinsic::nvvm_mbarrier_arrive_relaxed_scope_cta_space_cta,
+ llvm::Intrinsic::nvvm_mbarrier_arrive_relaxed_scope_cta_space_cluster,
+ llvm::Intrinsic::nvvm_mbarrier_arrive_relaxed_scope_cluster_space_cta,
+ llvm::Intrinsic::
+ nvvm_mbarrier_arrive_relaxed_scope_cluster_space_cluster};
+ auto id = thisOp.getRelaxed() ? relaxedIDs[index] : IDs[index];
+
+ // Tidy-up the Intrinsic Args
+ bool needCast = isPtrInGenericSpace(thisOp.getAddr());
+ llvm::Value *mbar = mt.lookupValue(thisOp.getAddr());
+ if (needCast)
+ mbar = castPtrToAddrSpace(builder, mbar, NVVMMemorySpace::Shared);
+
+ // When count is not explicitly specified, the default is 1.
+ llvm::LLVMContext &ctx = mt.getLLVMContext();
+ bool hasCount = static_cast<bool>(thisOp.getCount());
+ llvm::Value *count =
+ hasCount ? mt.lookupValue(thisOp.getCount())
+ : llvm::ConstantInt::get(llvm::Type::getInt32Ty(ctx), 1);
+
+ return {id, {mbar, count}};
+}
+
+mlir::NVVM::IDArgPair MBarrierArriveDropOp::getIntrinsicIDAndArgs(
+ Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) {
+ auto thisOp = cast<NVVM::MBarrierArriveDropOp>(op);
+
+ bool isClusterSpace = isPtrInSharedClusterSpace(thisOp.getAddr());
+ bool isClusterScope = thisOp.getScope() == NVVM::MemScopeKind::CLUSTER;
+ // bit-0: Space
+ // bit-1: Scope
+ size_t index = ((isClusterScope ? 1 : 0) << 1) | (isClusterSpace ? 1 : 0);
+
+ static constexpr llvm::Intrinsic::ID IDs[] = {
+ llvm::Intrinsic::nvvm_mbarrier_arrive_drop_scope_cta_space_cta,
+ llvm::Intrinsic::nvvm_mbarrier_arrive_drop_scope_cta_space_cluster,
+ llvm::Intrinsic::nvvm_mbarrier_arrive_drop_scope_cluster_space_cta,
+ llvm::Intrinsic::nvvm_mbarrier_arrive_drop_scope_cluster_space_cluster};
+ static constexpr llvm::Intrinsic::ID relaxedIDs[] = {
+ llvm::Intrinsic::nvvm_mbarrier_arrive_drop_relaxed_scope_cta_space_cta,
+ llvm::Intrinsic::
+ nvvm_mbarrier_arrive_drop_relaxed_scope_cta_space_cluster,
+ llvm::Intrinsic::
+ nvvm_mbarrier_arrive_drop_relaxed_scope_cluster_space_cta,
+ llvm::Intrinsic::
+ nvvm_mbarrier_arrive_drop_relaxed_scope_cluster_space_cluster};
+ auto id = thisOp.getRelaxed() ? relaxedIDs[index] : IDs[index];
+
+ // Tidy-up the Intrinsic Args
+ bool needCast = isPtrInGenericSpace(thisOp.getAddr());
+ llvm::Value *mbar = mt.lookupValue(thisOp.getAddr());
+ if (needCast)
+ mbar = castPtrToAddrSpace(builder, mbar, NVVMMemorySpace::Shared);
+
+ // When count is not explicitly specified, the default is 1.
+ llvm::LLVMContext &ctx = mt.getLLVMContext();
+ bool hasCount = static_cast<bool>(thisOp.getCount());
+ llvm::Value *count =
+ hasCount ? mt.lookupValue(thisOp.getCount())
+ : llvm::ConstantInt::get(llvm::Type::getInt32Ty(ctx), 1);
+
+ return {id, {mbar, count}};
}
mlir::NVVM::IDArgPair MBarrierArriveNocompleteOp::getIntrinsicIDAndArgs(
@@ -1887,6 +1998,21 @@ mlir::NVVM::IDArgPair MBarrierArriveNocompleteOp::getIntrinsicIDAndArgs(
return {id, std::move(args)};
}
+mlir::NVVM::IDArgPair MBarrierArriveDropNocompleteOp::getIntrinsicIDAndArgs(
+ Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) {
+ auto thisOp = cast<NVVM::MBarrierArriveDropNocompleteOp>(op);
+ bool isShared = isPtrInSharedCTASpace(thisOp.getAddr());
+ llvm::Intrinsic::ID id =
+ isShared ? llvm::Intrinsic::nvvm_mbarrier_arrive_drop_noComplete_shared
+ : llvm::Intrinsic::nvvm_mbarrier_arrive_drop_noComplete;
+ // Fill the Intrinsic Args
+ llvm::SmallVector<llvm::Value *> args;
+ args.push_back(mt.lookupValue(thisOp.getAddr()));
+ args.push_back(mt.lookupValue(thisOp.getCount()));
+
+ return {id, std::move(args)};
+}
+
mlir::NVVM::IDArgPair MBarrierTestWaitOp::getIntrinsicIDAndArgs(
Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) {
auto thisOp = cast<NVVM::MBarrierTestWaitOp>(op);
diff --git a/mlir/test/Target/LLVMIR/nvvm/mbar_arrive.mlir b/mlir/test/Target/LLVMIR/nvvm/mbar_arrive.mlir
new file mode 100644
index 0000000000000..6e7e1636c1de5
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/nvvm/mbar_arrive.mlir
@@ -0,0 +1,103 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+llvm.func @mbarrier_arrive_generic(%barrier: !llvm.ptr, %count : i32) {
+ // CHECK-LABEL: define void @mbarrier_arrive_generic(ptr %0, i32 %1) {
+ // CHECK-NEXT: %3 = addrspacecast ptr %0 to ptr addrspace(3)
+ // CHECK-NEXT: %4 = call i64 @llvm.nvvm.mbarrier.arrive.scope.cta.space.cta(ptr addrspace(3) %3, i32 1)
+ // CHECK-NEXT: %5 = addrspacecast ptr %0 to ptr addrspace(3)
+ // CHECK-NEXT: %6 = call i64 @llvm.nvvm.mbarrier.arrive.scope.cta.space.cta(ptr addrspace(3) %5, i32 %1)
+ // CHECK-NEXT: %7 = addrspacecast ptr %0 to ptr addrspace(3)
+ // CHECK-NEXT: %8 = call i64 @llvm.nvvm.mbarrier.arrive.scope.cta.space.cta(ptr addrspace(3) %7, i32 %1)
+ // CHECK-NEXT: %9 = addrspacecast ptr %0 to ptr addrspace(3)
+ // CHECK-NEXT: %10 = call i64 @llvm.nvvm.mbarrier.arrive.scope.cluster.space.cta(ptr addrspace(3) %9, i32 %1)
+ // CHECK-NEXT: %11 = addrspacecast ptr %0 to ptr addrspace(3)
+ // CHECK-NEXT: %12 = call i64 @llvm.nvvm.mbarrier.arrive.relaxed.scope.cta.space.cta(ptr addrspace(3) %11, i32 1)
+ // CHECK-NEXT: %13 = addrspacecast ptr %0 to ptr addrspace(3)
+ // CHECK-NEXT: %14 = call i64 @llvm.nvvm.mbarrier.arrive.relaxed.scope.cta.space.cta(ptr addrspace(3) %13, i32 %1)
+ // CHECK-NEXT: %15 = addrspacecast ptr %0 to ptr addrspace(3)
+ // CHECK-NEXT: %16 = call i64 @llvm.nvvm.mbarrier.arrive.relaxed.scope.cta.space.cta(ptr addrspace(3) %15, i32 %1)
+ // CHECK-NEXT: %17 = addrspacecast ptr %0 to ptr addrspace(3)
+ // CHECK-NEXT: %18 = call i64 @llvm.nvvm.mbarrier.arrive.relaxed.scope.cluster.space.cta(ptr addrspace(3) %17, i32 %1)
+ // CHECK-NEXT: ret void
+ // CHECK-NEXT: }
+ %0 = nvvm.mbarrier.arrive %barrier : !llvm.ptr -> i64
+ %1 = nvvm.mbarrier.arrive %barrier, %count : !llvm.ptr -> i64
+ %2 = nvvm.mbarrier.arrive %barrier, %count {scope = #nvvm.mem_scope<cta>} : !llvm.ptr -> i64
+ %3 = nvvm.mbarrier.arrive %barrier, %count {scope = #nvvm.mem_scope<cluster>} : !llvm.ptr -> i64
+
+ %4 = nvvm.mbarrier.arrive %barrier {relaxed = true} : !llvm.ptr -> i64
+ %5 = nvvm.mbarrier.arrive %barrier, %count {relaxed = true} : !llvm.ptr -> i64
+ %6 = nvvm.mbarrier.arrive %barrier, %count {scope = #nvvm.mem_scope<cta>, relaxed = true} : !llvm.ptr -> i64
+ %7 = nvvm.mbarrier.arrive %barrier, %count {scope = #nvvm.mem_scope<cluster>, relaxed = true} : !llvm.ptr -> i64
+ llvm.return
+}
+
+llvm.func @mbarrier_arrive_shared(%barrier: !llvm.ptr<3>, %count : i32) {
+ // CHECK-LABEL: define void @mbarrier_arrive_shared(ptr addrspace(3) %0, i32 %1) {
+ // CHECK-NEXT: %3 = call i64 @llvm.nvvm.mbarrier.arrive.scope.cta.space.cta(ptr addrspace(3) %0, i32 1)
+ // CHECK-NEXT: %4 = call i64 @llvm.nvvm.mbarrier.arrive.scope.cta.space.cta(ptr addrspace(3) %0, i32 %1)
+ // CHECK-NEXT: %5 = call i64 @llvm.nvvm.mbarrier.arrive.scope.cta.space.cta(ptr addrspace(3) %0, i32 %1)
+ // CHECK-NEXT: %6 = call i64 @llvm.nvvm.mbarrier.arrive.scope.cluster.space.cta(ptr addrspace(3) %0, i32 %1)
+ // CHECK-NEXT: %7 = call i64 @llvm.nvvm.mbarrier.arrive.relaxed.scope.cta.space.cta(ptr addrspace(3) %0, i32 1)
+ // CHECK-NEXT: %8 = call i64 @llvm.nvvm.mbarrier.arrive.relaxed.scope.cta.space.cta(ptr addrspace(3) %0, i32 %1)
+ // CHECK-NEXT: %9 = call i64 @llvm.nvvm.mbarrier.arrive.relaxed.scope.cta.space.cta(ptr addrspace(3) %0, i32 %1)
+ // CHECK-NEXT: %10 = call i64 @llvm.nvvm.mbarrier.arrive.relaxed.scope.cluster.space.cta(ptr addrspace(3) %0, i32 %1)
+ // CHECK-NEXT: ret void
+ // CHECK-NEXT: }
+ %0 = nvvm.mbarrier.arrive %barrier : !llvm.ptr<3> -> i64
+ %1 = nvvm.mbarrier.arrive %barrier, %count : !llvm.ptr<3> -> i64
+ %2 = nvvm.mbarrier.arrive %barrier, %count {scope = #nvvm.mem_scope<cta>} : !llvm.ptr<3> -> i64
+ %3 = nvvm.mbarrier.arrive %barrier, %count {scope = #nvvm.mem_scope<cluster>} : !llvm.ptr<3> -> i64
+
+ %4 = nvvm.mbarrier.arrive %barrier {relaxed = true} : !llvm.ptr<3> -> i64
+ %5 = nvvm.mbarrier.arrive %barrier, %count {relaxed = true} : !llvm.ptr<3> -> i64
+ %6 = nvvm.mbarrier.arrive %barrier, %count {scope = #nvvm.mem_scope<cta>, relaxed = true} : !llvm.ptr<3> -> i64
+ %7 = nvvm.mbarrier.arrive %barrier, %count {scope = #nvvm.mem_scope<cluster>, relaxed = true} : !llvm.ptr<3> -> i64
+ llvm.return
+}
+
+llvm.func @mbarrier_arrive_shared_cluster(%barrier: !llvm.ptr<7>, %count : i32) {
+ // CHECK-LABEL: define void @mbarrier_arrive_shared_cluster(ptr addrspace(7) %0, i32 %1) {
+ // CHECK-NEXT: call void @llvm.nvvm.mbarrier.arrive.scope.cta.space.cluster(ptr addrspace(7) %0, i32 1)
+ // CHECK-NEXT: call void @llvm.nvvm.mbarrier.arrive.scope.cta.space.cluster(ptr addrspace(7) %0, i32 %1)
+ // CHECK-NEXT: call void @llvm.nvvm.mbarrier.arrive.scope.cta.space.cluster(ptr addrspace(7) %0, i32 %1)
+ // CHECK-NEXT: call void @llvm.nvvm.mbarrier.arrive.scope.cluster.space.cluster(ptr addrspace(7) %0, i32 %1)
+ // CHECK-NEXT: call void @llvm.nvvm.mbarrier.arrive.relaxed.scope.cta.space.cluster(ptr addrspace(...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/168758
More information about the Mlir-commits
mailing list