[Mlir-commits] [mlir] 3ae5f27 - [ROCDL] Added LDS barrier ops to ROCDL (gfx1250) (#171810)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Fri Dec 12 07:28:03 PST 2025
Author: Ravil Dorozhinskii
Date: 2025-12-12T16:27:59+01:00
New Revision: 3ae5f2782e3cea9f0b19b86cf0b928e6e0adedf0
URL: https://github.com/llvm/llvm-project/commit/3ae5f2782e3cea9f0b19b86cf0b928e6e0adedf0
DIFF: https://github.com/llvm/llvm-project/commit/3ae5f2782e3cea9f0b19b86cf0b928e6e0adedf0.diff
LOG: [ROCDL] Added LDS barrier ops to ROCDL (gfx1250) (#171810)
Added `ds.atomic.barrier.arrive.rtn.b64` and
`ds.atomic.async.barrier.arrive.b64` to ROCDL. These are parts of the
LDS memory barrier concept in GFX1250. Also added alias analysis to
`global/flat` data prefetch ops. Extended rocdl tests.
Added:
Modified:
mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
mlir/test/Dialect/LLVMIR/rocdl.mlir
mlir/test/Target/LLVMIR/rocdl.mlir
Removed:
################################################################################
diff --git a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
index 426a100dfca02..99cc6da0ec304 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
@@ -1192,25 +1192,81 @@ def ROCDL_RawBufferAtomicCmpSwap :
// Memory prefetch intrinsics
def ROCDL_GlobalPrefetchOp :
- ROCDL_IntrOp<"global.prefetch", [], [], [], 0, 0, 0, 0, [1], ["scope"]>,
- Arguments<(ins Arg<LLVM_PointerInAddressSpace<1>, "", []>:$ptr, I32Attr:$scope)> {
+ ROCDL_IntrOp<"global.prefetch", [], [], [], 0, 0, 1, 0, [1], ["scope"]> {
+ dag args = (ins Arg<LLVM_PointerInAddressSpace<1>, "", [MemRead]>:$ptr,
+ I32Attr:$scope);
+ let arguments = !con(args, baseArgs);
let description = [{
Prefetches 1 byte of data per lane from global memory into the WGP-cache or L2-cache.
Available on gfx1250+.
}];
let results = (outs);
let assemblyFormat = "$ptr `,` `scope` $scope attr-dict `:` qualified(type($ptr))";
+ let extraClassDefinition = [{
+ SmallVector<Value> $cppClass::getAccessedOperands() {
+ return {getPtr()};
+ }
+ }];
}
def ROCDL_FlatPrefetchOp :
- ROCDL_IntrOp<"flat.prefetch", [], [], [], 0, 0, 0, 0, [1], ["scope"]>,
- Arguments<(ins Arg<LLVM_PointerInAddressSpace<0>, "", []>:$ptr, I32Attr:$scope)> {
+ ROCDL_IntrOp<"flat.prefetch", [], [], [], 0, 0, 1, 0, [1], ["scope"]> {
+ dag args = (ins Arg<LLVM_PointerInAddressSpace<0>, "", [MemRead]>:$ptr,
+ I32Attr:$scope);
+ let arguments = !con(args, baseArgs);
let description = [{
Prefetches 1 byte of data per lane using flat-memory addresses into the WGP-cache or L2-cache.
Available on gfx1250+.
}];
let results = (outs);
let assemblyFormat = "$ptr `,` `scope` $scope attr-dict `:` qualified(type($ptr))";
+ let extraClassDefinition = [{
+ SmallVector<Value> $cppClass::getAccessedOperands() {
+ return {getPtr()};
+ }
+ }];
+}
+
+//===---------------------------------------------------------------------===//
+// Atomic barrier intrinsic (LDS memory barriers).
+
+def ROCDL_DsAtomicBarrierArriveRtnOp :
+ ROCDL_IntrOp<"ds.atomic.barrier.arrive.rtn.b64", [], [], [], 1, 0, 1, 0, [], []> {
+ dag args = (ins Arg<ROCDLBufferLDS, "", [MemRead, MemWrite]>:$barrierPtr,
+ I64:$val);
+ let arguments = !con(args, baseArgs);
+ let description = [{
+ Waits on a given DS barrier and decrements its pending count by a given value. Note, the barrier state
+ is given as a 64-bit structure containing pending count, phase and init count. The op returns the old
+ barrier state. The op is executed as an ordinary LDS operations and it is ordered with other LDS operations.
+ Thus, check DSCNT to determine when this instruction has executed.
+ Available on gfx1250+.
+ }];
+ let results = (outs I64:$res);
+ let assemblyFormat = "$barrierPtr `,` $val attr-dict `:` qualified(type($barrierPtr)) `,` type($val) `->` type($res)";
+ let extraClassDefinition = [{
+ SmallVector<Value> $cppClass::getAccessedOperands() {
+ return {getBarrierPtr()};
+ }
+ }];
+}
+
+def ROCDL_DsAtomicAsyncBarrierArriveOp :
+ ROCDL_IntrOp<"ds.atomic.async.barrier.arrive.b64", [], [], [], 0, 0, 1, 0, [], []> {
+ dag args = (ins Arg<ROCDLBufferLDS, "", [MemWrite]>:$barrierPtr);
+ let arguments = !con(args, baseArgs);
+ let description = [{
+ Waits on a given DS barrier and decrements pending count by -1.
+ Stays in order with ASYNC loads to LDS, and uses ASYNCcnt to track its completion.
+ Available on gfx1250+.
+ }];
+ let results = (outs);
+ let assemblyFormat = "$barrierPtr attr-dict `:` qualified(type($barrierPtr))";
+ let extraClassDefinition = [{
+ SmallVector<Value> $cppClass::getAccessedOperands() {
+ return {getBarrierPtr()};
+ }
+ }];
}
//===---------------------------------------------------------------------===//
diff --git a/mlir/test/Dialect/LLVMIR/rocdl.mlir b/mlir/test/Dialect/LLVMIR/rocdl.mlir
index 2bab8e2079ec9..3ddadde944965 100644
--- a/mlir/test/Dialect/LLVMIR/rocdl.mlir
+++ b/mlir/test/Dialect/LLVMIR/rocdl.mlir
@@ -966,6 +966,15 @@ llvm.func @rocdl.flat.prefetch(%ptr : !llvm.ptr) {
llvm.return
}
+llvm.func @rocdl.atomic.barriers.arrive(%ptr : !llvm.ptr<3>, %val : i64) {
+ // CHECK-LABEL: rocdl.atomic.barriers.arrive
+ // CHECK: rocdl.ds.atomic.async.barrier.arrive.b64 %{{.*}} : !llvm.ptr<3>
+ // CHECK: %{{.*}} = rocdl.ds.atomic.barrier.arrive.rtn.b64 %{{.*}}, %{{.*}} : !llvm.ptr<3>, i64 -> i64
+ rocdl.ds.atomic.async.barrier.arrive.b64 %ptr : !llvm.ptr<3>
+ %res = rocdl.ds.atomic.barrier.arrive.rtn.b64 %ptr, %val : !llvm.ptr<3>, i64 -> i64
+ llvm.return
+}
+
// -----
llvm.func @rocdl.raw.buffer.f32(%rsrc : vector<4xi32>,
diff --git a/mlir/test/Target/LLVMIR/rocdl.mlir b/mlir/test/Target/LLVMIR/rocdl.mlir
index e074ae6dfdd51..5e1d857bd2a5d 100644
--- a/mlir/test/Target/LLVMIR/rocdl.mlir
+++ b/mlir/test/Target/LLVMIR/rocdl.mlir
@@ -1428,6 +1428,15 @@ llvm.func @rocdl.flat.prefetch(%ptr : !llvm.ptr) {
llvm.return
}
+llvm.func @rocdl.atomic.barriers.arrive(%ptr : !llvm.ptr<3>, %val : i64) {
+ // CHECK-LABEL: rocdl.atomic.barriers.arrive
+ // CHECK: call void @llvm.amdgcn.ds.atomic.async.barrier.arrive.b64(ptr addrspace(3) %{{.*}})
+ // CHECK: %{{.*}} = call i64 @llvm.amdgcn.ds.atomic.barrier.arrive.rtn.b64(ptr addrspace(3) %{{.*}}, i64 %{{.*}})
+ rocdl.ds.atomic.async.barrier.arrive.b64 %ptr : !llvm.ptr<3>
+ %res = rocdl.ds.atomic.barrier.arrive.rtn.b64 %ptr, %val : !llvm.ptr<3>, i64 -> i64
+ llvm.return
+}
+
llvm.func @rocdl.wmma.scale(%arg0: i32, %arg1: vector<4xf32>, %arg2: vector<8xi32>,
%arg3: vector<12xi32>, %arg5: vector<16xi32>,
%arg8: i64, %arg9: vector<8xf32>) -> vector<4xf32> {
More information about the Mlir-commits
mailing list