[Mlir-commits] [mlir] [mlir][ROCDL] Wrap asyncmark and wait.asyncmark intrinsics (PR #181054)

Krzysztof Drewniak llvmlistbot at llvm.org
Wed Feb 11 18:00:08 PST 2026


https://github.com/krzysz00 updated https://github.com/llvm/llvm-project/pull/181054

>From 131ac5c608f82af1ad1dc4df7ac9cae78cc36604 Mon Sep 17 00:00:00 2001
From: Krzysztof Drewniak <Krzysztof.Drewniak at amd.com>
Date: Wed, 11 Feb 2026 23:48:11 +0000
Subject: [PATCH] [mlir][ROCDL] Wrap asyncmark and wait.asyncmark intrinsics

(see op-level and LLVM documentation for details so I'm not repeating
myself, but these are the general operations for compiler-operated
asynchronous operation tracking, which frees programmers from having
to deal with all the different counters, allows certain optimization,
and doesn't require precise alias analysis)

-----

Co-authored-by: Claude Opus 4.5 <noreply at anthropic.com>
---
 mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td | 55 ++++++++++++++++++++
 mlir/test/Dialect/LLVMIR/rocdl.mlir          | 14 +++++
 mlir/test/Target/LLVMIR/rocdl.mlir           | 14 +++++
 3 files changed, 83 insertions(+)

diff --git a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
index 6a874aafdec38..c3af1bd32ebda 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
@@ -590,6 +590,61 @@ def ROCDL_WaitTensorcntOp: ROCDL_ConcreteNonMemIntrOp<"s.wait.tensorcnt", [], 0,
   let assemblyFormat = "$count attr-dict";
 }
 
+def ROCDL_AsyncmarkOp : ROCDL_ConcreteNonMemIntrOp<"asyncmark", [], 0>,
+    Arguments<(ins)> {
+  let summary = "Mark the end of a group of asynchronous operations";
+  let description = [{
+      This operation, in conjunction with `rocdl.wait.asyncmark`, forms the
+      compiler-provided framework for tracking explicitly asynchronous
+      memory operations, such as copies to LDS that use async intrinsics
+      and gfx1250's tensor loads.
+
+      Details of its behavior can be found in
+      [the LLVM documentation on async tracking](/llvm/docs/AMDGPUAsyncOperations.rst).
+
+      See `rocdl.wait.asyncmark`'s documentation for a usage example.
+
+      Available on gfx9 and later.
+  }];
+  let results = (outs);
+  let assemblyFormat = "attr-dict";
+}
+
+def ROCDL_WaitAsyncmarkOp: ROCDL_ConcreteNonMemIntrOp<"wait.asyncmark", [], 0, [0], ["count"]>,
+    Arguments<(ins I16Attr:$count)> {
+  let summary = "Wait until N or fewer async operation groups are unexecuted";
+  let description = [{
+      This operation, along with `rocdl.asyncmark`, forms the compiler-provided
+      framework for explicitly tracking asynchronous operations.
+
+      At the point where a wait.asyncmark operation is executed, all async operations
+      that were parts of any async group (established by asyncmark in program order)
+      other than the `count` previously-added ones will have finished executing.
+
+      For more detail, including on how this mechanism composes with function calls,
+      see [the LLVM documentation on async tracking](/llvm/docs/AMDGPUAsyncOperations.rst).
+
+      Available on gfx9 and later.
+
+      Example:
+      ```mlir
+      rocdl.tensor.load.to.lds ...
+      rocdl.global.async.load.to.lds ...
+
+      rocdl.asyncmark
+
+      rocdl.tensor.load.to.lds ...
+      rocdl.global.async.load.to.lds ...
+
+      rocdl.asyncmark
+
+      rocdl.wait.asyncmark 1 // First group of loads completes after this
+      ```
+  }];
+  let results = (outs);
+  let assemblyFormat = "$count attr-dict";
+}
+
 def ROCDL_SetPrioOp : ROCDL_ConcreteNonMemIntrOp<"s.setprio", [], 0, [0], ["priority"]>,
   Arguments<(ins I16Attr:$priority)> {
   let assemblyFormat = "$priority attr-dict";
diff --git a/mlir/test/Dialect/LLVMIR/rocdl.mlir b/mlir/test/Dialect/LLVMIR/rocdl.mlir
index ca599e8025348..2adb5bc90915a 100644
--- a/mlir/test/Dialect/LLVMIR/rocdl.mlir
+++ b/mlir/test/Dialect/LLVMIR/rocdl.mlir
@@ -1256,6 +1256,20 @@ llvm.func @rocdl.s.wait.tensorcnt() {
   llvm.return
 }
 
+llvm.func @rocdl.asyncmark() {
+  // CHECK-LABEL: rocdl.asyncmark
+  // CHECK: rocdl.asyncmark
+  rocdl.asyncmark
+  llvm.return
+}
+
+llvm.func @rocdl.wait.asyncmark() {
+  // CHECK-LABEL: rocdl.wait.asyncmark
+  // CHECK: rocdl.wait.asyncmark 0
+  rocdl.wait.asyncmark 0
+  llvm.return
+}
+
 // -----
 
 llvm.func @rocdl.readfirstlane(%src : f32) -> f32 {
diff --git a/mlir/test/Target/LLVMIR/rocdl.mlir b/mlir/test/Target/LLVMIR/rocdl.mlir
index 382bc0b9f8ff6..7a7e76410e4d2 100644
--- a/mlir/test/Target/LLVMIR/rocdl.mlir
+++ b/mlir/test/Target/LLVMIR/rocdl.mlir
@@ -391,6 +391,20 @@ llvm.func @rocdl.s.wait.tensorcnt() {
   llvm.return
 }
 
+llvm.func @rocdl.asyncmark() {
+  // CHECK-LABEL: rocdl.asyncmark
+  // CHECK-NEXT: call void @llvm.amdgcn.asyncmark()
+  rocdl.asyncmark
+  llvm.return
+}
+
+llvm.func @rocdl.wait.asyncmark() {
+  // CHECK-LABEL: rocdl.wait.asyncmark
+  // CHECK-NEXT: call void @llvm.amdgcn.wait.asyncmark(i16 0)
+  rocdl.wait.asyncmark 0
+  llvm.return
+}
+
 llvm.func @rocdl.setprio() {
   // CHECK: call void @llvm.amdgcn.s.setprio(i16 0)
   rocdl.s.setprio 0



More information about the Mlir-commits mailing list