[Mlir-commits] [mlir] [MLIR][NVVM] Add tcgen05 wait/fence Ops (PR #126265)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Fri Feb 7 08:56:11 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-mlir-llvm
Author: Durgadoss R (durga4github)
<details>
<summary>Changes</summary>
PR #<!-- -->126091 adds intrinsics for tcgen05
wait/fence/commit operations. This patch
adds NVVM Dialect Ops for them.
---
Full diff: https://github.com/llvm/llvm-project/pull/126265.diff
3 Files Affected:
- (modified) mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td (+109)
- (modified) mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp (+30)
- (added) mlir/test/Target/LLVMIR/nvvm/tcgen05-barriers.mlir (+56)
``````````diff
diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
index 11226dae2c3f375..fe15a524ec3b5cb 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
@@ -2617,6 +2617,30 @@ def Tcgen05GroupKindAttr :
let assemblyFormat = "`<` $value `>`";
}
+def Tcgen05FenceBefore : I32EnumAttrCase<"BEFORE_THREAD_SYNC", 0, "before">;
+def Tcgen05FenceAfter : I32EnumAttrCase<"AFTER_THREAD_SYNC", 1, "after">;
+def Tcgen05FenceKind : I32EnumAttr<"Tcgen05FenceKind", "NVVM Tcgen05 fence kind",
+ [Tcgen05FenceBefore, Tcgen05FenceAfter]> {
+ let genSpecializedAttr = 0;
+ let cppNamespace = "::mlir::NVVM";
+}
+def Tcgen05FenceKindAttr :
+ EnumAttr<NVVM_Dialect, Tcgen05FenceKind, "tcgen05_fence"> {
+ let assemblyFormat = "`<` $value `>`";
+}
+
+def Tcgen05WaitLoad : I32EnumAttrCase<"LOAD", 0, "load">;
+def Tcgen05WaitStore : I32EnumAttrCase<"STORE", 1, "store">;
+def Tcgen05WaitKind : I32EnumAttr<"Tcgen05WaitKind", "NVVM Tcgen05 wait kind",
+ [Tcgen05WaitLoad, Tcgen05WaitStore]> {
+ let genSpecializedAttr = 0;
+ let cppNamespace = "::mlir::NVVM";
+}
+def Tcgen05WaitKindAttr :
+ EnumAttr<NVVM_Dialect, Tcgen05WaitKind, "tcgen05_wait"> {
+ let assemblyFormat = "`<` $value `>`";
+}
+
def NVVM_Tcgen05AllocOp : NVVM_Op<"tcgen05.alloc"> {
let summary = "Tcgen05 alloc operation";
let description = [{
@@ -2701,6 +2725,91 @@ def NVVM_Tcgen05RelinquishAllocPermitOp : NVVM_Op<"tcgen05.relinquish_alloc_perm
}];
}
+def NVVM_Tcgen05FenceOp : NVVM_Op<"tcgen05.fence"> {
+ let summary = "Tcgen05 fence operations";
+ let description = [{
+ The `tcgen05.fence<before>` orders all prior async tcgen05 operations
+ with respect to the subsequent tcgen05 and execution ordering operations.
+ The `tcgen05.fence<after>` orders all subsequent async tcgen05 operations
+ with respect to the prior tcgen05 and execution ordering operations.
+
+ [For more information refer to the PTX ISA]
+ (https://docs.nvidia.com/cuda/parallel-thread-execution/#tensorcore-5th-generation-instructions-tcgen05-fence)
+ }];
+
+ let arguments = (ins Tcgen05FenceKindAttr:$kind);
+ let assemblyFormat = "$kind attr-dict";
+
+ string llvmBuilder = [{
+ auto id = ($kind == NVVM::Tcgen05FenceKind::BEFORE_THREAD_SYNC)
+ ? llvm::Intrinsic::nvvm_tcgen05_fence_before_thread_sync
+ : llvm::Intrinsic::nvvm_tcgen05_fence_after_thread_sync;
+ createIntrinsicCall(builder, id);
+ }];
+}
+
+def NVVM_Tcgen05WaitOp : NVVM_Op<"tcgen05.wait"> {
+ let summary = "Tcgen05 wait operations";
+ let description = [{
+ The `tcgen05.wait<load>` causes the executing thread to block until
+ all prior `tcgen05.ld` operations issued by the executing thread
+ have completed. Similarly, the `tcgen05.wait<store>` causes the executing
+ thread to block until all prior `tcgen05.st` operations issued by the
+ executing thread have completed.
+ [For more information refer PTX ISA]
+ (https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-instructions-tcgen05-wait)
+ }];
+
+ let arguments = (ins Tcgen05WaitKindAttr:$kind);
+ let assemblyFormat = "$kind attr-dict";
+
+ string llvmBuilder = [{
+ auto id = ($kind == NVVM::Tcgen05WaitKind::LOAD)
+ ? llvm::Intrinsic::nvvm_tcgen05_wait_ld
+ : llvm::Intrinsic::nvvm_tcgen05_wait_st;
+ createIntrinsicCall(builder, id);
+ }];
+}
+
+def NVVM_Tcgen05CommitOp : NVVM_Op<"tcgen05.commit"> {
+ let summary = "Tcgen05 commit operations";
+ let description = [{
+ The `tcgen05.commit` makes the mbarrier object, specified by
+ the operand `addr`, track the completion of all the prior
+ async-tcgen05 operations initiated by the executing thread.
+ The multicast variants allow signaling on the mbarrier objects
+ of multiple CTAs within the cluster. Operand `multicastMask`,
+ when present, specifies the destination CTAs in the cluster such
+ that each bit position in the 16-bit `multicastMask` operand
+ corresponds to the `nvvm.read.ptx.sreg.ctaid` of the destination CTA.
+ [For more information refer PTX ISA]
+ (https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen-async-sync-operations-commit)
+ }];
+
+ let arguments = (ins
+ AnyTypeOf<[LLVM_AnyPointer, LLVM_PointerShared]>:$addr,
+ Optional<I16>:$multicastMask,
+ DefaultValuedAttr<Tcgen05GroupKindAttr, "Tcgen05GroupKind::CTA_1">:$group);
+
+ let assemblyFormat = [{
+ $addr (`,` `multicast_mask` `=` $multicastMask^)?
+ attr-dict `:` type(operands)
+ }];
+
+ let extraClassDeclaration = [{
+ static llvm::Intrinsic::ID
+ getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt,
+ llvm::SmallVector<llvm::Value *> &args);
+ }];
+
+ string llvmBuilder = [{
+ llvm::SmallVector<llvm::Value *> args;
+ auto id = NVVM::Tcgen05CommitOp::getIntrinsicIDAndArgs(
+ *op, moduleTranslation, args);
+ createIntrinsicCall(builder, id, args);
+ }];
+}
+
//===----------------------------------------------------------------------===//
// NVVM target attribute.
//===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
index 241b25c6caf128e..62f0c213381111c 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
@@ -1284,6 +1284,36 @@ llvm::Intrinsic::ID Tcgen05DeallocOp::getIntrinsicIDAndArgs(
return id;
}
+#define TCGEN05_COMMIT_IMPL(cg, is_shared, mc) \
+ is_shared ? llvm::Intrinsic::nvvm_tcgen05_commit##mc##_shared##_##cg \
+ : llvm::Intrinsic::nvvm_tcgen05_commit##mc##_##cg
+
+#define GET_TCGEN05_COMMIT_ID(cta_group, is_shared, has_mc) \
+ has_mc ? TCGEN05_COMMIT_IMPL(cta_group, is_shared, _mc) \
+ : TCGEN05_COMMIT_IMPL(cta_group, is_shared, )
+
+llvm::Intrinsic::ID
+Tcgen05CommitOp::getIntrinsicIDAndArgs(Operation &op,
+ LLVM::ModuleTranslation &mt,
+ llvm::SmallVector<llvm::Value *> &args) {
+ auto curOp = cast<NVVM::Tcgen05CommitOp>(op);
+ unsigned AS = llvm::cast<LLVM::LLVMPointerType>(curOp.getAddr().getType())
+ .getAddressSpace();
+ bool isShared = AS == NVVMMemorySpace::kSharedMemorySpace;
+ bool hasMulticast = curOp.getMulticastMask() ? true : false;
+ bool is2CTAMode = curOp.getGroup() == Tcgen05GroupKind::CTA_2;
+
+ auto id = is2CTAMode ? GET_TCGEN05_COMMIT_ID(cg2, isShared, hasMulticast)
+ : GET_TCGEN05_COMMIT_ID(cg1, isShared, hasMulticast);
+
+ // Fill the Intrinsic Args
+ args.push_back(mt.lookupValue(curOp.getAddr()));
+ if (hasMulticast)
+ args.push_back(mt.lookupValue(curOp.getMulticastMask()));
+
+ return id;
+}
+
/// Infer the result ranges for the NVVM SpecialRangeableRegisterOp that might
/// have ConstantRangeAttr.
static void nvvmInferResultRanges(Operation *op, Value result,
diff --git a/mlir/test/Target/LLVMIR/nvvm/tcgen05-barriers.mlir b/mlir/test/Target/LLVMIR/nvvm/tcgen05-barriers.mlir
new file mode 100644
index 000000000000000..7536a4567e34e58
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/nvvm/tcgen05-barriers.mlir
@@ -0,0 +1,56 @@
+// RUN: mlir-opt -split-input-file -verify-diagnostics %s
+// RUN: mlir-translate -mlir-to-llvmir -split-input-file -verify-diagnostics %s | FileCheck %s --check-prefix=CHECK-LLVM
+
+// CHECK-LABEL: @llvm_nvvm_tcgen05_fence
+llvm.func @llvm_nvvm_tcgen05_fence() {
+ // CHECK-LLVM: call void @llvm.nvvm.tcgen05.fence.before.thread.sync()
+ nvvm.tcgen05.fence #nvvm.tcgen05_fence<before>
+
+ // CHECK-LLVM: call void @llvm.nvvm.tcgen05.fence.after.thread.sync()
+ nvvm.tcgen05.fence #nvvm.tcgen05_fence<after>
+
+ llvm.return
+}
+
+// CHECK-LABEL: @llvm_nvvm_tcgen05_wait
+llvm.func @llvm_nvvm_tcgen05_wait() {
+ // CHECK-LLVM: call void @llvm.nvvm.tcgen05.wait.ld()
+ nvvm.tcgen05.wait #nvvm.tcgen05_wait<load>
+
+ // CHECK-LLVM: call void @llvm.nvvm.tcgen05.wait.st()
+ nvvm.tcgen05.wait #nvvm.tcgen05_wait<store>
+
+ llvm.return
+}
+
+// CHECK-LABEL: @llvm_nvvm_tcgen05_commit_generic
+llvm.func @llvm_nvvm_tcgen05_commit_generic(%barrier : !llvm.ptr, %cta_mask : i16) {
+ // CHECK-LLVM: call void @llvm.nvvm.tcgen05.commit.cg1(ptr %{{.*}})
+ nvvm.tcgen05.commit %barrier : !llvm.ptr
+
+ // CHECK-LLVM: call void @llvm.nvvm.tcgen05.commit.cg2(ptr %{{.*}})
+ nvvm.tcgen05.commit %barrier {group = #nvvm.tcgen05_group<cta_2>} : !llvm.ptr
+
+ // CHECK-LLVM: call void @llvm.nvvm.tcgen05.commit.mc.cg1(ptr %{{.*}}, i16 %{{.*}})
+ nvvm.tcgen05.commit %barrier, multicast_mask = %cta_mask : !llvm.ptr, i16
+
+ // CHECK-LLVM: call void @llvm.nvvm.tcgen05.commit.mc.cg2(ptr %{{.*}}, i16 %{{.*}})
+ nvvm.tcgen05.commit %barrier, multicast_mask = %cta_mask {group = #nvvm.tcgen05_group<cta_2>} : !llvm.ptr, i16
+ llvm.return
+}
+
+// CHECK-LABEL: @llvm_nvvm_tcgen05_commit_shared
+llvm.func @llvm_nvvm_tcgen05_commit_shared(%barrier : !llvm.ptr<3>, %cta_mask : i16) {
+ // CHECK-LLVM: call void @llvm.nvvm.tcgen05.commit.shared.cg1(ptr addrspace(3) %{{.*}})
+ nvvm.tcgen05.commit %barrier : !llvm.ptr<3>
+
+ // CHECK-LLVM: call void @llvm.nvvm.tcgen05.commit.shared.cg2(ptr addrspace(3) %{{.*}})
+ nvvm.tcgen05.commit %barrier {group = #nvvm.tcgen05_group<cta_2>} : !llvm.ptr<3>
+
+ // CHECK-LLVM: call void @llvm.nvvm.tcgen05.commit.mc.shared.cg1(ptr addrspace(3) %{{.*}}, i16 %{{.*}})
+ nvvm.tcgen05.commit %barrier, multicast_mask = %cta_mask : !llvm.ptr<3>, i16
+
+ // CHECK-LLVM: call void @llvm.nvvm.tcgen05.commit.mc.shared.cg2(ptr addrspace(3) %{{.*}}, i16 %{{.*}})
+ nvvm.tcgen05.commit %barrier, multicast_mask = %cta_mask {group = #nvvm.tcgen05_group<cta_2>} : !llvm.ptr<3>, i16
+ llvm.return
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/126265
More information about the Mlir-commits
mailing list