[Mlir-commits] [mlir] [ROCDL] Added LDS barrier ops to ROCDL (gfx1250) (PR #171810)
Ravil Dorozhinskii
llvmlistbot at llvm.org
Fri Dec 12 03:04:12 PST 2025
https://github.com/ravil-mobile updated https://github.com/llvm/llvm-project/pull/171810
>From 810cdcafcc836d19a01c614227935517f98ccb36 Mon Sep 17 00:00:00 2001
From: ravil-mobile <ravil.aviva.com at gmail.com>
Date: Thu, 11 Dec 2025 11:37:00 +0000
Subject: [PATCH] [ROCDL] Added LDS barrier ops to ROCDL (gfx1250)
---
mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td | 64 ++++++++++++++++++--
mlir/test/Dialect/LLVMIR/rocdl.mlir | 9 +++
mlir/test/Target/LLVMIR/rocdl.mlir | 9 +++
3 files changed, 78 insertions(+), 4 deletions(-)
diff --git a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
index 57cb98a1d9be7..0d9165bb7db5d 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
@@ -1177,25 +1177,81 @@ def ROCDL_RawBufferAtomicCmpSwap :
// Memory prefetch intrinsics
def ROCDL_GlobalPrefetchOp :
- ROCDL_IntrOp<"global.prefetch", [], [], [], 0, 0, 0, 0, [1], ["scope"]>,
- Arguments<(ins Arg<LLVM_PointerInAddressSpace<1>, "", []>:$ptr, I32Attr:$scope)> {
+ ROCDL_IntrOp<"global.prefetch", [], [], [], 0, 0, 1, 0, [1], ["scope"]> {
+ dag args = (ins Arg<LLVM_PointerInAddressSpace<1>, "", [MemRead]>:$ptr,
+ I32Attr:$scope);
+ let arguments = !con(args, baseArgs);
let description = [{
Prefetches 1 byte of data per lane from global memory into the WGP-cache or L2-cache.
Available on gfx1250+.
}];
let results = (outs);
let assemblyFormat = "$ptr `,` `scope` $scope attr-dict `:` qualified(type($ptr))";
+ let extraClassDefinition = [{
+ SmallVector<Value> $cppClass::getAccessedOperands() {
+ return {getPtr()};
+ }
+ }];
}
def ROCDL_FlatPrefetchOp :
- ROCDL_IntrOp<"flat.prefetch", [], [], [], 0, 0, 0, 0, [1], ["scope"]>,
- Arguments<(ins Arg<LLVM_PointerInAddressSpace<0>, "", []>:$ptr, I32Attr:$scope)> {
+ ROCDL_IntrOp<"flat.prefetch", [], [], [], 0, 0, 1, 0, [1], ["scope"]> {
+ dag args = (ins Arg<LLVM_PointerInAddressSpace<0>, "", [MemRead]>:$ptr,
+ I32Attr:$scope);
+ let arguments = !con(args, baseArgs);
let description = [{
Prefetches 1 byte of data per lane using flat-memory addresses into the WGP-cache or L2-cache.
Available on gfx1250+.
}];
let results = (outs);
let assemblyFormat = "$ptr `,` `scope` $scope attr-dict `:` qualified(type($ptr))";
+ let extraClassDefinition = [{
+ SmallVector<Value> $cppClass::getAccessedOperands() {
+ return {getPtr()};
+ }
+ }];
+}
+
+//===---------------------------------------------------------------------===//
+// Atomic barrier intrinsic (LDS memory barriers).
+
+def ROCDL_DsAtomicBarrierArriveRtnOp :
+ ROCDL_IntrOp<"ds.atomic.barrier.arrive.rtn.b64", [], [], [], 1, 0, 1, 0, [], []> {
+ dag args = (ins Arg<ROCDLBufferLDS, "", [MemRead, MemWrite]>:$barrierPtr,
+ I64:$val);
+ let arguments = !con(args, baseArgs);
+ let description = [{
+ Waits on a given DS barrier and decrements its pending count by a given value. Note, the barrier state
+ is given as a 64-bit structure containing pending count, phase and init count. The op returns the old
+ barrier state. The op is executed as an ordinary LDS operations and it is ordered with other LDS operations.
+ Thus, check DSCNT to determine when this instruction has executed.
+ Available on gfx1250+.
+ }];
+ let results = (outs I64:$res);
+ let assemblyFormat = "$barrierPtr `,` $val attr-dict `:` qualified(type($barrierPtr)) `,` type($val) `->` type($res)";
+ let extraClassDefinition = [{
+ SmallVector<Value> $cppClass::getAccessedOperands() {
+ return {getBarrierPtr()};
+ }
+ }];
+}
+
+def ROCDL_DsAtomicAsyncBarrierArriveOp :
+ ROCDL_IntrOp<"ds.atomic.async.barrier.arrive.b64", [], [], [], 0, 0, 1, 0, [], []> {
+ dag args = (ins Arg<ROCDLBufferLDS, "", [MemWrite]>:$barrierPtr);
+ let arguments = !con(args, baseArgs);
+ let description = [{
+ Waits on a given DS barrier and decrements pending count by -1.
+ Stays in order with ASYNC loads to LDS, and uses ASYNCcnt to track its completion.
+ Available on gfx1250+.
+ }];
+ let results = (outs);
+ let assemblyFormat = "$barrierPtr attr-dict `:` qualified(type($barrierPtr))";
+ let extraClassDefinition = [{
+ SmallVector<Value> $cppClass::getAccessedOperands() {
+ return {getBarrierPtr()};
+ }
+ }];
}
//===---------------------------------------------------------------------===//
diff --git a/mlir/test/Dialect/LLVMIR/rocdl.mlir b/mlir/test/Dialect/LLVMIR/rocdl.mlir
index ae25b111ea325..ba029631a223d 100644
--- a/mlir/test/Dialect/LLVMIR/rocdl.mlir
+++ b/mlir/test/Dialect/LLVMIR/rocdl.mlir
@@ -892,6 +892,15 @@ llvm.func @rocdl.flat.prefetch(%ptr : !llvm.ptr) {
llvm.return
}
+llvm.func @rocdl.atomic.barriers.arrive(%ptr : !llvm.ptr<3>, %val : i64) {
+ // CHECK-LABEL: rocdl.atomic.barriers.arrive
+ // CHECK: rocdl.ds.atomic.async.barrier.arrive.b64 %{{.*}} : !llvm.ptr<3>
+ // CHECK: %{{.*}} = rocdl.ds.atomic.barrier.arrive.rtn.b64 %{{.*}}, %{{.*}} : !llvm.ptr<3>, i64 -> i64
+ rocdl.ds.atomic.async.barrier.arrive.b64 %ptr : !llvm.ptr<3>
+ %res = rocdl.ds.atomic.barrier.arrive.rtn.b64 %ptr, %val : !llvm.ptr<3>, i64 -> i64
+ llvm.return
+}
+
// -----
llvm.func @rocdl.raw.buffer.f32(%rsrc : vector<4xi32>,
diff --git a/mlir/test/Target/LLVMIR/rocdl.mlir b/mlir/test/Target/LLVMIR/rocdl.mlir
index 87faed16aa59c..cbd7e36b88cd2 100644
--- a/mlir/test/Target/LLVMIR/rocdl.mlir
+++ b/mlir/test/Target/LLVMIR/rocdl.mlir
@@ -1355,6 +1355,15 @@ llvm.func @rocdl.flat.prefetch(%ptr : !llvm.ptr) {
llvm.return
}
+llvm.func @rocdl.atomic.barriers.arrive(%ptr : !llvm.ptr<3>, %val : i64) {
+ // CHECK-LABEL: rocdl.atomic.barriers.arrive
+ // CHECK: call void @llvm.amdgcn.ds.atomic.async.barrier.arrive.b64(ptr addrspace(3) %{{.*}})
+ // CHECK: %{{.*}} = call i64 @llvm.amdgcn.ds.atomic.barrier.arrive.rtn.b64(ptr addrspace(3) %{{.*}}, i64 %{{.*}})
+ rocdl.ds.atomic.async.barrier.arrive.b64 %ptr : !llvm.ptr<3>
+ %res = rocdl.ds.atomic.barrier.arrive.rtn.b64 %ptr, %val : !llvm.ptr<3>, i64 -> i64
+ llvm.return
+}
+
llvm.func @rocdl.wmma.scale(%arg0: i32, %arg1: vector<4xf32>, %arg2: vector<8xi32>,
%arg3: vector<12xi32>, %arg5: vector<16xi32>,
%arg8: i64, %arg9: vector<8xf32>) -> vector<4xf32> {
More information about the Mlir-commits
mailing list