[Mlir-commits] [mlir] f88d060 - [mlir][amdgpu] `memory_counter_wait` tensor counter support (#171153)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Mon Dec 8 09:02:44 PST 2025
Author: Ivan Butygin
Date: 2025-12-08T20:02:40+03:00
New Revision: f88d060c4176d17df56587a083944637ca865cb3
URL: https://github.com/llvm/llvm-project/commit/f88d060c4176d17df56587a083944637ca865cb3
DIFF: https://github.com/llvm/llvm-project/commit/f88d060c4176d17df56587a083944637ca865cb3.diff
LOG: [mlir][amdgpu] `memory_counter_wait` tensor counter support (#171153)
Added:
mlir/test/Conversion/AMDGPUToROCDL/memory_counter_wait_tensor.mlir
mlir/test/Conversion/AMDGPUToROCDL/memory_counter_wait_unsupported.mlir
Modified:
mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp
mlir/test/Conversion/AMDGPUToROCDL/memory_counter_wait.mlir
mlir/test/Dialect/AMDGPU/canonicalize.mlir
mlir/test/Dialect/AMDGPU/ops.mlir
Removed:
################################################################################
diff --git a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
index ba078f52d24f6..56160d3e8fe85 100644
--- a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
+++ b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
@@ -906,7 +906,8 @@ def AMDGPU_MemoryCounterWaitOp :
OptionalAttr<I32Attr>:$load,
OptionalAttr<I32Attr>:$store,
OptionalAttr<I32Attr>:$ds,
- OptionalAttr<I32Attr>:$exp
+ OptionalAttr<I32Attr>:$exp,
+ OptionalAttr<I32Attr>:$tensor
)>
{
let summary = "Wait for specified hardware counters";
@@ -919,7 +920,7 @@ def AMDGPU_MemoryCounterWaitOp :
counters into one.
}];
let assemblyFormat = [{
- oilist( `load` `(` $load `)` | `store` `(` $store `)` | `ds` `(` $ds `)` | `exp` `(` $exp `)` ) attr-dict
+ oilist( `load` `(` $load `)` | `store` `(` $store `)` | `ds` `(` $ds `)` | `exp` `(` $exp `)` | `tensor` `(` $tensor `)` ) attr-dict
}];
let hasCanonicalizer = 1;
diff --git a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
index f3b0da0120998..7584b17075225 100644
--- a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
+++ b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp
@@ -506,10 +506,16 @@ struct MemoryCounterWaitOpLowering
if (std::optional<int> exp = adaptor.getExp())
ROCDL::WaitExpcntOp::create(rewriter, loc, *exp);
+ if (std::optional<int> tensor = adaptor.getTensor())
+ ROCDL::WaitTensorcntOp::create(rewriter, loc, *tensor);
+
rewriter.eraseOp(op);
return success();
}
+ if (adaptor.getTensor())
+ return op.emitOpError("unsupported chipset");
+
auto getVal = [](Attribute attr) -> unsigned {
if (attr)
return cast<IntegerAttr>(attr).getInt();
diff --git a/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp b/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp
index 4a85db3ecf6f8..b7a665b0f5367 100644
--- a/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp
+++ b/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp
@@ -614,10 +614,12 @@ struct FuseMemoryCounterWaitOp final : OpRewritePattern<MemoryCounterWaitOp> {
auto setters = {&MemoryCounterWaitOp::setLoad,
&MemoryCounterWaitOp::setStore, &MemoryCounterWaitOp::setDs,
- &MemoryCounterWaitOp::setExp};
- auto lhsVals = {op.getLoad(), op.getStore(), op.getDs(), op.getExp()};
+ &MemoryCounterWaitOp::setExp,
+ &MemoryCounterWaitOp::setTensor};
+ auto lhsVals = {op.getLoad(), op.getStore(), op.getDs(), op.getExp(),
+ op.getTensor()};
auto rhsVals = {next.getLoad(), next.getStore(), next.getDs(),
- next.getExp()};
+ next.getExp(), next.getTensor()};
rewriter.modifyOpInPlace(op, [&] {
for (auto [setter, lhs, rhs] :
llvm::zip_equal(setters, lhsVals, rhsVals)) {
diff --git a/mlir/test/Conversion/AMDGPUToROCDL/memory_counter_wait.mlir b/mlir/test/Conversion/AMDGPUToROCDL/memory_counter_wait.mlir
index 1016ee859e462..537ef59b503a6 100644
--- a/mlir/test/Conversion/AMDGPUToROCDL/memory_counter_wait.mlir
+++ b/mlir/test/Conversion/AMDGPUToROCDL/memory_counter_wait.mlir
@@ -1,7 +1,7 @@
-// RUN: mlir-opt %s -convert-amdgpu-to-rocdl=chipset=gfx942 | FileCheck %s --check-prefixes=CHECK,GFX9
-// RUN: mlir-opt %s -convert-amdgpu-to-rocdl=chipset=gfx1030 | FileCheck %s --check-prefixes=CHECK,GFX10
-// RUN: mlir-opt %s -convert-amdgpu-to-rocdl=chipset=gfx1100 | FileCheck %s --check-prefixes=CHECK,GFX11
-// RUN: mlir-opt %s -convert-amdgpu-to-rocdl=chipset=gfx1201 | FileCheck %s --check-prefixes=CHECK,GFX12
+// RUN: mlir-opt %s --convert-amdgpu-to-rocdl=chipset=gfx942 | FileCheck %s --check-prefixes=CHECK,GFX9
+// RUN: mlir-opt %s --convert-amdgpu-to-rocdl=chipset=gfx1030 | FileCheck %s --check-prefixes=CHECK,GFX10
+// RUN: mlir-opt %s --convert-amdgpu-to-rocdl=chipset=gfx1100 | FileCheck %s --check-prefixes=CHECK,GFX11
+// RUN: mlir-opt %s --convert-amdgpu-to-rocdl=chipset=gfx1201 | FileCheck %s --check-prefixes=CHECK,GFX12
// CHECK-LABEL: func @memory_counter_wait
func.func @memory_counter_wait() {
diff --git a/mlir/test/Conversion/AMDGPUToROCDL/memory_counter_wait_tensor.mlir b/mlir/test/Conversion/AMDGPUToROCDL/memory_counter_wait_tensor.mlir
new file mode 100644
index 0000000000000..5b29e01abebdb
--- /dev/null
+++ b/mlir/test/Conversion/AMDGPUToROCDL/memory_counter_wait_tensor.mlir
@@ -0,0 +1,9 @@
+// RUN: mlir-opt %s --convert-amdgpu-to-rocdl=chipset=gfx1250 | FileCheck %s
+
+// CHECK-LABEL: func @memory_counter_wait_tensor
+func.func @memory_counter_wait_tensor() {
+ // CHECK: rocdl.s.wait.tensorcnt 3
+ amdgpu.memory_counter_wait tensor(3)
+
+ return
+}
diff --git a/mlir/test/Conversion/AMDGPUToROCDL/memory_counter_wait_unsupported.mlir b/mlir/test/Conversion/AMDGPUToROCDL/memory_counter_wait_unsupported.mlir
new file mode 100644
index 0000000000000..1d2f692bee488
--- /dev/null
+++ b/mlir/test/Conversion/AMDGPUToROCDL/memory_counter_wait_unsupported.mlir
@@ -0,0 +1,11 @@
+// RUN: mlir-opt %s --verify-diagnostics --convert-amdgpu-to-rocdl=chipset=gfx942
+// RUN: mlir-opt %s --verify-diagnostics --convert-amdgpu-to-rocdl=chipset=gfx1030
+// RUN: mlir-opt %s --verify-diagnostics --convert-amdgpu-to-rocdl=chipset=gfx1100
+
+func.func @memory_counter_wait_tensor() {
+ // expected-error @below{{failed to legalize operation 'amdgpu.memory_counter_wait'}}
+ // expected-error @below{{'amdgpu.memory_counter_wait' op unsupported chipset}}
+ amdgpu.memory_counter_wait tensor(0)
+
+ return
+}
diff --git a/mlir/test/Dialect/AMDGPU/canonicalize.mlir b/mlir/test/Dialect/AMDGPU/canonicalize.mlir
index c66e9ed5d6f6d..cff1d3f2ac1fd 100644
--- a/mlir/test/Dialect/AMDGPU/canonicalize.mlir
+++ b/mlir/test/Dialect/AMDGPU/canonicalize.mlir
@@ -250,10 +250,10 @@ func.func @scaled_mfma_ugly_shapes(%opA: vector<32xf4E2M1FN>, %opB: vector<32xf4
// CHECK-LABEL fuse_memory_counter_wait
func.func @fuse_memory_counter_wait() {
// CHECK: amdgpu.memory_counter_wait
- // CHECK-SAME: load(1) store(2) ds(2) exp(1)
+ // CHECK-SAME: load(1) store(2) ds(2) exp(1) tensor(0)
// CHECK-NEXT: return
- amdgpu.memory_counter_wait load(1) store(2) ds(3) exp(4)
- amdgpu.memory_counter_wait load(4) store(3) ds(2) exp(1)
+ amdgpu.memory_counter_wait load(1) store(2) ds(3) exp(4) tensor(5)
+ amdgpu.memory_counter_wait load(4) store(3) ds(2) exp(1) tensor(0)
return
}
diff --git a/mlir/test/Dialect/AMDGPU/ops.mlir b/mlir/test/Dialect/AMDGPU/ops.mlir
index cf3f7a9cb08a2..651aff4a0d22a 100644
--- a/mlir/test/Dialect/AMDGPU/ops.mlir
+++ b/mlir/test/Dialect/AMDGPU/ops.mlir
@@ -671,18 +671,20 @@ func.func @gather_to_lds(%idx1 : index, %idx2 : index, %mem1 : memref<32xf16>, %
// CHECK-LABEL: func @memory_counter_wait
func.func @memory_counter_wait() {
- // CHECK: amdgpu.memory_counter_wait load(1) store(2) ds(3) exp(4)
- // CHECK: amdgpu.memory_counter_wait load(4) store(2) ds(3) exp(1)
+ // CHECK: amdgpu.memory_counter_wait load(1) store(2) ds(3) exp(4) tensor(5)
+ // CHECK: amdgpu.memory_counter_wait load(4) store(2) ds(3) exp(1) tensor(0)
// CHECK: amdgpu.memory_counter_wait load(1)
// CHECK: amdgpu.memory_counter_wait store(2)
// CHECK: amdgpu.memory_counter_wait ds(3)
// CHECK: amdgpu.memory_counter_wait exp(4)
- amdgpu.memory_counter_wait load(1) store(2) ds(3) exp(4)
- amdgpu.memory_counter_wait exp(1) store(2) ds(3) load(4)
+ // CHECK: amdgpu.memory_counter_wait tensor(5)
+ amdgpu.memory_counter_wait load(1) store(2) ds(3) exp(4) tensor(5)
+ amdgpu.memory_counter_wait tensor(0) exp(1) store(2) ds(3) load(4)
amdgpu.memory_counter_wait load(1)
amdgpu.memory_counter_wait store(2)
amdgpu.memory_counter_wait ds(3)
amdgpu.memory_counter_wait exp(4)
+ amdgpu.memory_counter_wait tensor(5)
func.return
}
More information about the Mlir-commits
mailing list