[Mlir-commits] [mlir] [mlir][ROCDL] Wrap asyncmark and wait.asyncmark intrinsics (PR #181054)
Krzysztof Drewniak
llvmlistbot at llvm.org
Wed Feb 11 18:00:08 PST 2026
https://github.com/krzysz00 updated https://github.com/llvm/llvm-project/pull/181054
>From 131ac5c608f82af1ad1dc4df7ac9cae78cc36604 Mon Sep 17 00:00:00 2001
From: Krzysztof Drewniak <Krzysztof.Drewniak at amd.com>
Date: Wed, 11 Feb 2026 23:48:11 +0000
Subject: [PATCH] [mlir][ROCDL] Wrap asyncmark and wait.asyncmark intrinsics
(see op-level and LLVM documentation for details so I'm not repeating
myself, but these are the general operations for compiler-operated
asynchronous operation tracking, which frees programmers from having
to deal with all the different counters, allows certain optimization,
and doesn't require precise alias analysis)
-----
Co-authored-by: Claude Opus 4.5 <noreply at anthropic.com>
---
mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td | 55 ++++++++++++++++++++
mlir/test/Dialect/LLVMIR/rocdl.mlir | 14 +++++
mlir/test/Target/LLVMIR/rocdl.mlir | 14 +++++
3 files changed, 83 insertions(+)
diff --git a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
index 6a874aafdec38..c3af1bd32ebda 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
@@ -590,6 +590,61 @@ def ROCDL_WaitTensorcntOp: ROCDL_ConcreteNonMemIntrOp<"s.wait.tensorcnt", [], 0,
let assemblyFormat = "$count attr-dict";
}
+def ROCDL_AsyncmarkOp : ROCDL_ConcreteNonMemIntrOp<"asyncmark", [], 0>,
+ Arguments<(ins)> {
+ let summary = "Mark the end of a group of asynchronous operations";
+ let description = [{
+ This operation, in conjunction with `rocdl.wait.asyncmark`, forms the
+ compiler-provided framework for tracking explicitly asynchronous
+ memory operations, such as copies to LDS that use async intrinsics
+ and gfx1250's tensor loads.
+
+ Details of its behavior can be found in
+ [the LLVM documentation on async tracking](/llvm/docs/AMDGPUAsyncOperations.rst).
+
+ See `rocdl.wait.asyncmark`'s documentation for a usage example.
+
+ Available on gfx9 and later.
+ }];
+ let results = (outs);
+ let assemblyFormat = "attr-dict";
+}
+
+def ROCDL_WaitAsyncmarkOp: ROCDL_ConcreteNonMemIntrOp<"wait.asyncmark", [], 0, [0], ["count"]>,
+ Arguments<(ins I16Attr:$count)> {
+ let summary = "Wait until N or fewer async operation groups are unexecuted";
+ let description = [{
+ This operation, along with `rocdl.asyncmark`, forms the compiler-provided
+ framework for explicitly tracking asynchronous operations.
+
+ At the point where a wait.asyncmark operation is executed, all async operations
+ that were parts of any async group (established by asyncmark in program order)
+ other than the `count` previously-added ones will have finished executing.
+
+ For more detail, including on how this mechanism composes with function calls,
+ see [the LLVM documentation on async tracking](/llvm/docs/AMDGPUAsyncOperations.rst).
+
+ Available on gfx9 and later.
+
+ Example:
+ ```mlir
+ rocdl.tensor.load.to.lds ...
+ rocdl.global.async.load.to.lds ...
+
+ rocdl.asyncmark
+
+ rocdl.tensor.load.to.lds ...
+ rocdl.global.async.load.to.lds ...
+
+ rocdl.asyncmark
+
+ rocdl.wait.asyncmark 1 // First group of loads completes after this
+ ```
+ }];
+ let results = (outs);
+ let assemblyFormat = "$count attr-dict";
+}
+
def ROCDL_SetPrioOp : ROCDL_ConcreteNonMemIntrOp<"s.setprio", [], 0, [0], ["priority"]>,
Arguments<(ins I16Attr:$priority)> {
let assemblyFormat = "$priority attr-dict";
diff --git a/mlir/test/Dialect/LLVMIR/rocdl.mlir b/mlir/test/Dialect/LLVMIR/rocdl.mlir
index ca599e8025348..2adb5bc90915a 100644
--- a/mlir/test/Dialect/LLVMIR/rocdl.mlir
+++ b/mlir/test/Dialect/LLVMIR/rocdl.mlir
@@ -1256,6 +1256,20 @@ llvm.func @rocdl.s.wait.tensorcnt() {
llvm.return
}
+llvm.func @rocdl.asyncmark() {
+ // CHECK-LABEL: rocdl.asyncmark
+ // CHECK: rocdl.asyncmark
+ rocdl.asyncmark
+ llvm.return
+}
+
+llvm.func @rocdl.wait.asyncmark() {
+ // CHECK-LABEL: rocdl.wait.asyncmark
+ // CHECK: rocdl.wait.asyncmark 0
+ rocdl.wait.asyncmark 0
+ llvm.return
+}
+
// -----
llvm.func @rocdl.readfirstlane(%src : f32) -> f32 {
diff --git a/mlir/test/Target/LLVMIR/rocdl.mlir b/mlir/test/Target/LLVMIR/rocdl.mlir
index 382bc0b9f8ff6..7a7e76410e4d2 100644
--- a/mlir/test/Target/LLVMIR/rocdl.mlir
+++ b/mlir/test/Target/LLVMIR/rocdl.mlir
@@ -391,6 +391,20 @@ llvm.func @rocdl.s.wait.tensorcnt() {
llvm.return
}
+llvm.func @rocdl.asyncmark() {
+ // CHECK-LABEL: rocdl.asyncmark
+ // CHECK-NEXT: call void @llvm.amdgcn.asyncmark()
+ rocdl.asyncmark
+ llvm.return
+}
+
+llvm.func @rocdl.wait.asyncmark() {
+ // CHECK-LABEL: rocdl.wait.asyncmark
+ // CHECK-NEXT: call void @llvm.amdgcn.wait.asyncmark(i16 0)
+ rocdl.wait.asyncmark 0
+ llvm.return
+}
+
llvm.func @rocdl.setprio() {
// CHECK: call void @llvm.amdgcn.s.setprio(i16 0)
rocdl.s.setprio 0
More information about the Mlir-commits
mailing list