[Mlir-commits] [mlir] [mlir][amdgpu] Add amdgpu.make_dma_descriptor (PR #169407)
Erick Ochoa Lopez
llvmlistbot at llvm.org
Mon Nov 24 14:53:24 PST 2025
https://github.com/amd-eochoalo updated https://github.com/llvm/llvm-project/pull/169407
>From a4a1a59d894aae479a1bd5aebe2705431b6588b5 Mon Sep 17 00:00:00 2001
From: Erick Ochoa <erick.ochoalopez at amd.com>
Date: Fri, 21 Nov 2025 12:56:29 -0500
Subject: [PATCH 01/13] [mlir][amdgpu] Add make_dma_base operation
---
mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td | 55 +++++++++++++++++++
.../mlir/Dialect/AMDGPU/IR/AMDGPUDialect.h | 4 ++
mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp | 7 +++
mlir/test/Dialect/AMDGPU/ops.mlir | 12 ++++
4 files changed, 78 insertions(+)
diff --git a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
index 4820b7a747ac2..04043f47c3539 100644
--- a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
+++ b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
@@ -33,6 +33,7 @@ def AMDGPU_Dialect : Dialect {
"gpu::GPUDialect"
];
let useDefaultAttributePrinterParser = 1;
+ let useDefaultTypePrinterParser = 1;
}
def AnyIntegerOrFloat : AnyTypeOf<[AnySignlessInteger, AnyFloat], "Integer or Float">;
@@ -79,6 +80,36 @@ def AMDGPU_AddressSpaceAttr : EnumAttr<AMDGPU_Dialect, AMDGPU_AddressSpace,
let assemblyFormat = "`<` $value `>`";
}
+class AMDGPU_Type<string name, string typeMnemonic, list<Trait> traits = []>
+ : TypeDef<AMDGPU_Dialect, name, traits> {
+ let mnemonic = typeMnemonic;
+}
+
+//===----------------------------------------------------------------------===//
+// AMDGPU Type definitions
+//===----------------------------------------------------------------------===//
+
+def AMDGPU_TDMBaseType : AMDGPU_Type<"TDMBase", "tdm_base"> {
+ // TODO:
+ // * Add verifiers such that one of the memrefs is from LDS and the other global.
+ // * Add verifiers to make sure that the type is in the correct direction.
+ // * Add verifiers to make sure that the number of indices do not exceed the number of dimensions.
+
+ let summary = "Pair of base addresses that move data between LDS and global storage.";
+ let description = [{
+ This type is opaque and it is used to represent a struct of two addresses.
+ One address is in LDS while the other is in global memory.
+ }];
+ let parameters = (ins "Type":$elementType);
+ let builders = [
+ TypeBuilderWithInferredContext<(ins "Type":$elementType), [{
+ return $_get(elementType.getContext(), elementType);
+ }]>
+ ];
+ let assemblyFormat = "`<` $elementType `>`";
+
+}
+
//===----------------------------------------------------------------------===//
// AMDGPU Op definitions
//===----------------------------------------------------------------------===//
@@ -1192,4 +1223,28 @@ def AMDGPU_ScaledMFMAOp :
}];
let hasCanonicalizer = 1;
}
+
+def AMDGPU_MakeDmaBaseOp :
+ AMDGPU_Op<"make_dma_base", [AttrSizedOperandSegments]>,
+ Arguments<(ins
+ Arg<AnyMemRef, "buffer to read from", [MemRead]>:$src,
+ Variadic<Index>:$srcIndices,
+ Arg<AnyMemRef, "buffer to write to", [MemWrite]>:$dst,
+ Variadic<Index>:$dstIndices)>,
+ Results<(outs AMDGPU_TDMBaseType: $base)> {
+
+ let summary = "Pair of based addresses used when moving tiles between LDS and global memory.";
+ let description = [{
+ This operation creates a pair of addresses that will be used by tensor_load_to_lds
+ and tensor_store_from_lds.
+
+ This operation creates a value corresponding roughly to the descriptor group 0
+ found in TensorLoadToLDSOp and TensorStoreFromLDSOp in the rocdl dialect.
+ }];
+
+ let assemblyFormat = [{
+ $src `[` $srcIndices `]` `,` $dst `[` $dstIndices `]` attr-dict `:` type($src) `,` type($dst) `to` type(results)
+ }];
+}
+
#endif // AMDGPU
diff --git a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPUDialect.h b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPUDialect.h
index dcd9f95a7561f..a7680fb5c3191 100644
--- a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPUDialect.h
+++ b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPUDialect.h
@@ -25,6 +25,7 @@
#include "mlir/Dialect/AMDGPU/IR/AMDGPUDialect.h.inc"
#include "mlir/Dialect/AMDGPU/IR/AMDGPUEnums.h.inc"
+#include "mlir/Dialect/AMDGPU/IR/AMDGPUTypes.h.inc"
namespace mlir::amdgpu {
/// Parser for the `custom<MNKDimensionList>` custom assembly format used by
@@ -52,6 +53,9 @@ inline void printMNKDimensionList(OpAsmPrinter &printer, Operation *,
#define GET_ATTRDEF_CLASSES
#include "mlir/Dialect/AMDGPU/IR/AMDGPUAttributes.h.inc"
+#define GET_TYPEDEF_CLASSES
+#include "mlir/Dialect/AMDGPU/IR/AMDGPUTypes.h.inc"
+
#define GET_OP_CLASSES
#include "mlir/Dialect/AMDGPU/IR/AMDGPU.h.inc"
diff --git a/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp b/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp
index d55f3cec47c1f..cdc10c60a42ae 100644
--- a/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp
+++ b/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp
@@ -55,6 +55,10 @@ void AMDGPUDialect::initialize() {
#define GET_OP_LIST
#include "mlir/Dialect/AMDGPU/IR/AMDGPU.cpp.inc"
>();
+ addTypes<
+#define GET_TYPEDEF_LIST
+#include "mlir/Dialect/AMDGPU/IR/AMDGPUTypes.cpp.inc"
+ >();
addAttributes<
#define GET_ATTRDEF_LIST
#include "mlir/Dialect/AMDGPU/IR/AMDGPUAttributes.cpp.inc"
@@ -839,5 +843,8 @@ void ScaledMFMAOp::getCanonicalizationPatterns(RewritePatternSet &results,
#define GET_ATTRDEF_CLASSES
#include "mlir/Dialect/AMDGPU/IR/AMDGPUAttributes.cpp.inc"
+#define GET_TYPEDEF_CLASSES
+#include "mlir/Dialect/AMDGPU/IR/AMDGPUTypes.cpp.inc"
+
#define GET_OP_CLASSES
#include "mlir/Dialect/AMDGPU/IR/AMDGPU.cpp.inc"
diff --git a/mlir/test/Dialect/AMDGPU/ops.mlir b/mlir/test/Dialect/AMDGPU/ops.mlir
index 09134cb4704bb..653f9f64d24f4 100644
--- a/mlir/test/Dialect/AMDGPU/ops.mlir
+++ b/mlir/test/Dialect/AMDGPU/ops.mlir
@@ -685,3 +685,15 @@ func.func @memory_counter_wait() {
amdgpu.memory_counter_wait exp(4)
func.return
}
+
+// CHECK-LABEL: func @make_dma_base
+// CHECK-SAME: (%[[IDX:.+]]: index, %[[MEM:.+]]: memref<8xi32>, %[[SMEM:.+]]: memref<8xi32, #gpu.address_space<workgroup>>)
+func.func @make_dma_base(%idx: index, %mem: memref<8xi32>, %smem: memref<8xi32, #gpu.address_space<workgroup>>) {
+ // CHECK: amdgpu.make_dma_base %[[MEM]][%[[IDX]]], %[[SMEM]][%[[IDX]]] : memref<8xi32>, memref<8xi32, #gpu.address_space<workgroup>> to !amdgpu.tdm_base<i32>
+ amdgpu.make_dma_base %mem[%idx], %smem[%idx] : memref<8xi32>, memref<8xi32, #gpu.address_space<workgroup>> to !amdgpu.tdm_base<i32>
+
+ // CHECK: amdgpu.make_dma_base %[[SMEM]][%[[IDX]]], %[[MEM]][%[[IDX]]] : memref<8xi32, #gpu.address_space<workgroup>>, memref<8xi32> to !amdgpu.tdm_base<i32>
+ amdgpu.make_dma_base %smem[%idx], %mem[%idx] : memref<8xi32, #gpu.address_space<workgroup>>, memref<8xi32> to !amdgpu.tdm_base<i32>
+ func.return
+}
+
>From d14f3e28cc79774adb744ae6ee6d98684f120fa7 Mon Sep 17 00:00:00 2001
From: Erick Ochoa <erick.ochoalopez at amd.com>
Date: Mon, 24 Nov 2025 09:18:26 -0500
Subject: [PATCH 02/13] Remove MemRead and MemWrite from operation
---
mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
index 04043f47c3539..990d377dc9d7b 100644
--- a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
+++ b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
@@ -1227,9 +1227,9 @@ def AMDGPU_ScaledMFMAOp :
def AMDGPU_MakeDmaBaseOp :
AMDGPU_Op<"make_dma_base", [AttrSizedOperandSegments]>,
Arguments<(ins
- Arg<AnyMemRef, "buffer to read from", [MemRead]>:$src,
+ Arg<AnyMemRef, "buffer to read from">:$src,
Variadic<Index>:$srcIndices,
- Arg<AnyMemRef, "buffer to write to", [MemWrite]>:$dst,
+ Arg<AnyMemRef, "buffer to write to">:$dst,
Variadic<Index>:$dstIndices)>,
Results<(outs AMDGPU_TDMBaseType: $base)> {
>From d3ca18c937218a8f115e58b0a6d4d5b10bdc187a Mon Sep 17 00:00:00 2001
From: Erick Ochoa <erick.ochoalopez at amd.com>
Date: Mon, 24 Nov 2025 09:20:38 -0500
Subject: [PATCH 03/13] Add Pure to make_dma_base
---
mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
index 990d377dc9d7b..645fc4655025a 100644
--- a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
+++ b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
@@ -1225,7 +1225,7 @@ def AMDGPU_ScaledMFMAOp :
}
def AMDGPU_MakeDmaBaseOp :
- AMDGPU_Op<"make_dma_base", [AttrSizedOperandSegments]>,
+ AMDGPU_Op<"make_dma_base", [Pure, AttrSizedOperandSegments]>,
Arguments<(ins
Arg<AnyMemRef, "buffer to read from">:$src,
Variadic<Index>:$srcIndices,
>From 76e47f147ea84ec13d0a0afac5d5d2b963b9b49f Mon Sep 17 00:00:00 2001
From: Erick Ochoa <erick.ochoalopez at amd.com>
Date: Mon, 24 Nov 2025 12:11:43 -0500
Subject: [PATCH 04/13] Add DynamicIndexList
---
mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td | 44 +++++++++++++++----
mlir/test/Dialect/AMDGPU/ops.mlir | 8 ++++
2 files changed, 43 insertions(+), 9 deletions(-)
diff --git a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
index 645fc4655025a..e2fd78dab7ebf 100644
--- a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
+++ b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
@@ -80,21 +80,17 @@ def AMDGPU_AddressSpaceAttr : EnumAttr<AMDGPU_Dialect, AMDGPU_AddressSpace,
let assemblyFormat = "`<` $value `>`";
}
-class AMDGPU_Type<string name, string typeMnemonic, list<Trait> traits = []>
- : TypeDef<AMDGPU_Dialect, name, traits> {
- let mnemonic = typeMnemonic;
-}
//===----------------------------------------------------------------------===//
// AMDGPU Type definitions
//===----------------------------------------------------------------------===//
-def AMDGPU_TDMBaseType : AMDGPU_Type<"TDMBase", "tdm_base"> {
- // TODO:
- // * Add verifiers such that one of the memrefs is from LDS and the other global.
- // * Add verifiers to make sure that the type is in the correct direction.
- // * Add verifiers to make sure that the number of indices do not exceed the number of dimensions.
+class AMDGPU_Type<string name, string typeMnemonic, list<Trait> traits = []>
+ : TypeDef<AMDGPU_Dialect, name, traits> {
+ let mnemonic = typeMnemonic;
+}
+def AMDGPU_TDMBaseType : AMDGPU_Type<"TDMBase", "tdm_base"> {
let summary = "Pair of base addresses that move data between LDS and global storage.";
let description = [{
This type is opaque and it is used to represent a struct of two addresses.
@@ -107,6 +103,14 @@ def AMDGPU_TDMBaseType : AMDGPU_Type<"TDMBase", "tdm_base"> {
}]>
];
let assemblyFormat = "`<` $elementType `>`";
+}
+
+def AMDGPU_TDMDescriptorType : AMDGPU_Type<"TDMDescriptor", "tdm_descriptor"> {
+ let summary = "Descriptors used in tensor store/load operations.";
+ let description = [{
+ This type is opaque and corresponds to the two or four descriptor groups
+ used in tensor_load_to_lds or tensor_store_from_lds.
+ }];
}
@@ -1233,6 +1237,10 @@ def AMDGPU_MakeDmaBaseOp :
Variadic<Index>:$dstIndices)>,
Results<(outs AMDGPU_TDMBaseType: $base)> {
+ // TODO:
+ // * Add verifiers such that one of the memrefs is from LDS and the other global.
+ // * Add verifiers to make sure that the number of indices do not exceed the number of dimensions.
+
let summary = "Pair of based addresses used when moving tiles between LDS and global memory.";
let description = [{
This operation creates a pair of addresses that will be used by tensor_load_to_lds
@@ -1247,4 +1255,22 @@ def AMDGPU_MakeDmaBaseOp :
}];
}
+def AMDGPU_MakeDmaDescriptorOp :
+ AMDGPU_Op<"make_dma_descriptor", [Pure]>,
+ Arguments<(ins
+ AMDGPU_TDMBaseType: $base,
+ Variadic<Index>: $dynamic_sizes,
+ OptionalAttr<DenseI64ArrayAttr>: $static_sizes)>,
+ Results<(outs AMDGPU_TDMDescriptorType: $desc)> {
+
+ let summary = "TODO";
+ let description = [{
+ TODO
+ }];
+
+ let assemblyFormat = [{
+ $base `globalSize` custom<DynamicIndexList>($dynamic_sizes, $static_sizes) attr-dict `:` qualified(type($base)) `to` type(results)
+ }];
+}
+
#endif // AMDGPU
diff --git a/mlir/test/Dialect/AMDGPU/ops.mlir b/mlir/test/Dialect/AMDGPU/ops.mlir
index 653f9f64d24f4..818fd1afa2dc5 100644
--- a/mlir/test/Dialect/AMDGPU/ops.mlir
+++ b/mlir/test/Dialect/AMDGPU/ops.mlir
@@ -697,3 +697,11 @@ func.func @make_dma_base(%idx: index, %mem: memref<8xi32>, %smem: memref<8xi32,
func.return
}
+// CHECK-LABEL: func @make_dma_descriptor
+// CHECK-SAME: (%[[BASE:.+]]: !amdgpu.tdm_base<i32>)
+func.func @make_dma_descriptor(%base: !amdgpu.tdm_base<i32>) {
+ // CHECK: amdgpu.make_dma_descriptor %[[BASE]] globalSize [0] : !amdgpu.tdm_base<i32> to !amdgpu.tdm_descriptor
+ amdgpu.make_dma_descriptor %base globalSize [0] : !amdgpu.tdm_base<i32> to !amdgpu.tdm_descriptor
+ func.return
+}
+
>From f6f67e39b85c97c39445fa436462c2da916dec40 Mon Sep 17 00:00:00 2001
From: Erick Ochoa <erick.ochoalopez at amd.com>
Date: Mon, 24 Nov 2025 14:10:02 -0500
Subject: [PATCH 05/13] Add globalStride
---
mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td | 11 ++++++++---
mlir/test/Dialect/AMDGPU/ops.mlir | 4 ++--
2 files changed, 10 insertions(+), 5 deletions(-)
diff --git a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
index e2fd78dab7ebf..b08039064adff 100644
--- a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
+++ b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
@@ -1256,11 +1256,13 @@ def AMDGPU_MakeDmaBaseOp :
}
def AMDGPU_MakeDmaDescriptorOp :
- AMDGPU_Op<"make_dma_descriptor", [Pure]>,
+ AMDGPU_Op<"make_dma_descriptor", [Pure, AttrSizedOperandSegments]>,
Arguments<(ins
AMDGPU_TDMBaseType: $base,
Variadic<Index>: $dynamic_sizes,
- OptionalAttr<DenseI64ArrayAttr>: $static_sizes)>,
+ OptionalAttr<DenseI64ArrayAttr>: $static_sizes,
+ Variadic<Index>: $dynamic_strides,
+ OptionalAttr<DenseI64ArrayAttr>: $static_strides)>,
Results<(outs AMDGPU_TDMDescriptorType: $desc)> {
let summary = "TODO";
@@ -1269,7 +1271,10 @@ def AMDGPU_MakeDmaDescriptorOp :
}];
let assemblyFormat = [{
- $base `globalSize` custom<DynamicIndexList>($dynamic_sizes, $static_sizes) attr-dict `:` qualified(type($base)) `to` type(results)
+ $base
+ `globalSize` custom<DynamicIndexList>($dynamic_sizes, $static_sizes)
+ `globalStride` custom<DynamicIndexList>($dynamic_strides, $static_strides)
+ attr-dict `:` qualified(type($base)) `to` type(results)
}];
}
diff --git a/mlir/test/Dialect/AMDGPU/ops.mlir b/mlir/test/Dialect/AMDGPU/ops.mlir
index 818fd1afa2dc5..a36f59718f175 100644
--- a/mlir/test/Dialect/AMDGPU/ops.mlir
+++ b/mlir/test/Dialect/AMDGPU/ops.mlir
@@ -700,8 +700,8 @@ func.func @make_dma_base(%idx: index, %mem: memref<8xi32>, %smem: memref<8xi32,
// CHECK-LABEL: func @make_dma_descriptor
// CHECK-SAME: (%[[BASE:.+]]: !amdgpu.tdm_base<i32>)
func.func @make_dma_descriptor(%base: !amdgpu.tdm_base<i32>) {
- // CHECK: amdgpu.make_dma_descriptor %[[BASE]] globalSize [0] : !amdgpu.tdm_base<i32> to !amdgpu.tdm_descriptor
- amdgpu.make_dma_descriptor %base globalSize [0] : !amdgpu.tdm_base<i32> to !amdgpu.tdm_descriptor
+ // CHECK: amdgpu.make_dma_descriptor %[[BASE]] globalSize [0] globalStride [1] : !amdgpu.tdm_base<i32> to !amdgpu.tdm_descriptor
+ amdgpu.make_dma_descriptor %base globalSize [0] globalStride [1] : !amdgpu.tdm_base<i32> to !amdgpu.tdm_descriptor
func.return
}
>From 1e2668c8c4dfcf5588c49bfeae1a65be2ae15a98 Mon Sep 17 00:00:00 2001
From: Erick Ochoa <erick.ochoalopez at amd.com>
Date: Mon, 24 Nov 2025 14:25:24 -0500
Subject: [PATCH 06/13] Add verifier for innermost dimension
---
mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td | 14 ++++++++------
mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp | 11 +++++++++++
mlir/test/Dialect/AMDGPU/invalid.mlir | 10 ++++++++++
3 files changed, 29 insertions(+), 6 deletions(-)
diff --git a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
index b08039064adff..e0a356533144d 100644
--- a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
+++ b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
@@ -1259,10 +1259,10 @@ def AMDGPU_MakeDmaDescriptorOp :
AMDGPU_Op<"make_dma_descriptor", [Pure, AttrSizedOperandSegments]>,
Arguments<(ins
AMDGPU_TDMBaseType: $base,
- Variadic<Index>: $dynamic_sizes,
- OptionalAttr<DenseI64ArrayAttr>: $static_sizes,
- Variadic<Index>: $dynamic_strides,
- OptionalAttr<DenseI64ArrayAttr>: $static_strides)>,
+ Variadic<Index>: $global_dynamic_sizes,
+ OptionalAttr<DenseI64ArrayAttr>: $global_static_sizes,
+ Variadic<Index>: $global_dynamic_strides,
+ OptionalAttr<DenseI64ArrayAttr>: $global_static_strides)>,
Results<(outs AMDGPU_TDMDescriptorType: $desc)> {
let summary = "TODO";
@@ -1272,10 +1272,12 @@ def AMDGPU_MakeDmaDescriptorOp :
let assemblyFormat = [{
$base
- `globalSize` custom<DynamicIndexList>($dynamic_sizes, $static_sizes)
- `globalStride` custom<DynamicIndexList>($dynamic_strides, $static_strides)
+ `globalSize` custom<DynamicIndexList>($global_dynamic_sizes, $global_static_sizes)
+ `globalStride` custom<DynamicIndexList>($global_dynamic_strides, $global_static_strides)
attr-dict `:` qualified(type($base)) `to` type(results)
}];
+
+ let hasVerifier = 1;
}
#endif // AMDGPU
diff --git a/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp b/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp
index cdc10c60a42ae..4ade1164317af 100644
--- a/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp
+++ b/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp
@@ -705,6 +705,17 @@ LogicalResult TransposeLoadOp::verify() {
return success();
}
+//===----------------------------------------------------------------------===//
+// MakeDmaDescriptorOp
+//===----------------------------------------------------------------------===//
+
+LogicalResult MakeDmaDescriptorOp::verify() {
+ if (getGlobalStaticStrides()->back() != 1) {
+ return emitOpError("strides for the innermost dimension must be 1.");
+ }
+ return success();
+}
+
//===----------------------------------------------------------------------===//
// ScaledMFMAOp
//===----------------------------------------------------------------------===//
diff --git a/mlir/test/Dialect/AMDGPU/invalid.mlir b/mlir/test/Dialect/AMDGPU/invalid.mlir
index 61fdf29a78cbd..f820060d2c718 100644
--- a/mlir/test/Dialect/AMDGPU/invalid.mlir
+++ b/mlir/test/Dialect/AMDGPU/invalid.mlir
@@ -354,3 +354,13 @@ func.func @scaled_mfma_invalid_k(%arg0 : vector<4xf8E8M0FNU>, %arg1 : vector<32x
%0 = amdgpu.scaled_mfma 32x32x32 (%arg0[0] * %arg1) * (%arg0[1] * %arg1) + %arg2 : vector<4xf8E8M0FNU>, vector<32xf4E2M1FN>, vector<4xf8E8M0FNU>, vector<32xf4E2M1FN>, vector<16xf32>
func.return %0 : vector<16xf32>
}
+
+// -----
+
+// CHECK-LABEL: func @make_dma_descriptor
+// CHECK-SAME: (%[[BASE:.+]]: !amdgpu.tdm_base<i32>)
+func.func @make_dma_descriptor_invalid_strides(%base: !amdgpu.tdm_base<i32>) {
+ // expected-error at +1 {{'amdgpu.make_dma_descriptor' op strides for the innermost dimension must be 1.}}
+ amdgpu.make_dma_descriptor %base globalSize [0] globalStride [1, 2] : !amdgpu.tdm_base<i32> to !amdgpu.tdm_descriptor
+ func.return
+}
>From f1df3c5b9722cae7000d2c9584345befd7827dc9 Mon Sep 17 00:00:00 2001
From: Erick Ochoa <erick.ochoalopez at amd.com>
Date: Mon, 24 Nov 2025 14:32:44 -0500
Subject: [PATCH 07/13] Add sharedSize
---
mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td | 5 ++++-
mlir/test/Dialect/AMDGPU/invalid.mlir | 2 +-
mlir/test/Dialect/AMDGPU/ops.mlir | 10 ++++++++--
3 files changed, 13 insertions(+), 4 deletions(-)
diff --git a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
index e0a356533144d..16ef34d1486cb 100644
--- a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
+++ b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
@@ -1262,7 +1262,9 @@ def AMDGPU_MakeDmaDescriptorOp :
Variadic<Index>: $global_dynamic_sizes,
OptionalAttr<DenseI64ArrayAttr>: $global_static_sizes,
Variadic<Index>: $global_dynamic_strides,
- OptionalAttr<DenseI64ArrayAttr>: $global_static_strides)>,
+ OptionalAttr<DenseI64ArrayAttr>: $global_static_strides,
+ Variadic<Index>: $shared_dynamic_sizes,
+ OptionalAttr<DenseI64ArrayAttr>: $shared_static_sizes)>,
Results<(outs AMDGPU_TDMDescriptorType: $desc)> {
let summary = "TODO";
@@ -1274,6 +1276,7 @@ def AMDGPU_MakeDmaDescriptorOp :
$base
`globalSize` custom<DynamicIndexList>($global_dynamic_sizes, $global_static_sizes)
`globalStride` custom<DynamicIndexList>($global_dynamic_strides, $global_static_strides)
+ `sharedSize` custom<DynamicIndexList>($shared_dynamic_sizes, $shared_static_sizes)
attr-dict `:` qualified(type($base)) `to` type(results)
}];
diff --git a/mlir/test/Dialect/AMDGPU/invalid.mlir b/mlir/test/Dialect/AMDGPU/invalid.mlir
index f820060d2c718..e8a0bfe9476a7 100644
--- a/mlir/test/Dialect/AMDGPU/invalid.mlir
+++ b/mlir/test/Dialect/AMDGPU/invalid.mlir
@@ -361,6 +361,6 @@ func.func @scaled_mfma_invalid_k(%arg0 : vector<4xf8E8M0FNU>, %arg1 : vector<32x
// CHECK-SAME: (%[[BASE:.+]]: !amdgpu.tdm_base<i32>)
func.func @make_dma_descriptor_invalid_strides(%base: !amdgpu.tdm_base<i32>) {
// expected-error at +1 {{'amdgpu.make_dma_descriptor' op strides for the innermost dimension must be 1.}}
- amdgpu.make_dma_descriptor %base globalSize [0] globalStride [1, 2] : !amdgpu.tdm_base<i32> to !amdgpu.tdm_descriptor
+ amdgpu.make_dma_descriptor %base globalSize [0] globalStride [1, 2] sharedSize [0] : !amdgpu.tdm_base<i32> to !amdgpu.tdm_descriptor
func.return
}
diff --git a/mlir/test/Dialect/AMDGPU/ops.mlir b/mlir/test/Dialect/AMDGPU/ops.mlir
index a36f59718f175..0db84a187ddf5 100644
--- a/mlir/test/Dialect/AMDGPU/ops.mlir
+++ b/mlir/test/Dialect/AMDGPU/ops.mlir
@@ -700,8 +700,14 @@ func.func @make_dma_base(%idx: index, %mem: memref<8xi32>, %smem: memref<8xi32,
// CHECK-LABEL: func @make_dma_descriptor
// CHECK-SAME: (%[[BASE:.+]]: !amdgpu.tdm_base<i32>)
func.func @make_dma_descriptor(%base: !amdgpu.tdm_base<i32>) {
- // CHECK: amdgpu.make_dma_descriptor %[[BASE]] globalSize [0] globalStride [1] : !amdgpu.tdm_base<i32> to !amdgpu.tdm_descriptor
- amdgpu.make_dma_descriptor %base globalSize [0] globalStride [1] : !amdgpu.tdm_base<i32> to !amdgpu.tdm_descriptor
+ // CHECK: amdgpu.make_dma_descriptor %[[BASE]]
+ // CHECK-SAME: globalSize [0]
+ // CHECK-SAME: globalStride [1]
+ // CHECK-SAME: sharedSize [0] : !amdgpu.tdm_base<i32> to !amdgpu.tdm_descriptor
+ amdgpu.make_dma_descriptor %base
+ globalSize [0]
+ globalStride [1]
+ sharedSize [0] : !amdgpu.tdm_base<i32> to !amdgpu.tdm_descriptor
func.return
}
>From a24a840b4eb2a2d2daefc91d8b32738fc48cb9d4 Mon Sep 17 00:00:00 2001
From: Erick Ochoa <erick.ochoalopez at amd.com>
Date: Mon, 24 Nov 2025 15:11:16 -0500
Subject: [PATCH 08/13] Add optional atomic barrier
---
mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td | 16 +++++++++++-----
1 file changed, 11 insertions(+), 5 deletions(-)
diff --git a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
index 16ef34d1486cb..d73e35ce82806 100644
--- a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
+++ b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
@@ -1264,7 +1264,10 @@ def AMDGPU_MakeDmaDescriptorOp :
Variadic<Index>: $global_dynamic_strides,
OptionalAttr<DenseI64ArrayAttr>: $global_static_strides,
Variadic<Index>: $shared_dynamic_sizes,
- OptionalAttr<DenseI64ArrayAttr>: $shared_static_sizes)>,
+ OptionalAttr<DenseI64ArrayAttr>: $shared_static_sizes,
+ Optional<AnyMemRef>: $atomic_barrier_address,
+ Variadic<Index>: $atomic_barrier_dynamic_indices,
+ OptionalAttr<DenseI64ArrayAttr>: $atomic_barrier_static_indices)>,
Results<(outs AMDGPU_TDMDescriptorType: $desc)> {
let summary = "TODO";
@@ -1274,10 +1277,13 @@ def AMDGPU_MakeDmaDescriptorOp :
let assemblyFormat = [{
$base
- `globalSize` custom<DynamicIndexList>($global_dynamic_sizes, $global_static_sizes)
- `globalStride` custom<DynamicIndexList>($global_dynamic_strides, $global_static_strides)
- `sharedSize` custom<DynamicIndexList>($shared_dynamic_sizes, $shared_static_sizes)
- attr-dict `:` qualified(type($base)) `to` type(results)
+ `globalSize` custom<DynamicIndexList>($global_dynamic_sizes, $global_static_sizes)
+ `globalStride` custom<DynamicIndexList>($global_dynamic_strides, $global_static_strides)
+ `sharedSize` custom<DynamicIndexList>($shared_dynamic_sizes, $shared_static_sizes)
+ ( `atomicBarrier` `(` $atomic_barrier_address^
+ custom<DynamicIndexList>($atomic_barrier_dynamic_indices, $atomic_barrier_static_indices)
+ `:` type($atomic_barrier_address) `)`)?
+ attr-dict `:` qualified(type($base)) `to` type(results)
}];
let hasVerifier = 1;
>From ccaf771d1fa91476adc4454991621d2e1d31d412 Mon Sep 17 00:00:00 2001
From: Erick Ochoa <erick.ochoalopez at amd.com>
Date: Mon, 24 Nov 2025 15:32:17 -0500
Subject: [PATCH 09/13] Add iterate
---
mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td | 6 +++-
mlir/test/Dialect/AMDGPU/ops.mlir | 36 ++++++++++++++++---
2 files changed, 36 insertions(+), 6 deletions(-)
diff --git a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
index d73e35ce82806..8c04e45a1983e 100644
--- a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
+++ b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
@@ -1267,7 +1267,10 @@ def AMDGPU_MakeDmaDescriptorOp :
OptionalAttr<DenseI64ArrayAttr>: $shared_static_sizes,
Optional<AnyMemRef>: $atomic_barrier_address,
Variadic<Index>: $atomic_barrier_dynamic_indices,
- OptionalAttr<DenseI64ArrayAttr>: $atomic_barrier_static_indices)>,
+ OptionalAttr<DenseI64ArrayAttr>: $atomic_barrier_static_indices,
+ Optional<Index>: $global_increment,
+ Optional<Index>: $lds_increment,
+ Optional<Index>: $iteration_count)>,
Results<(outs AMDGPU_TDMDescriptorType: $desc)> {
let summary = "TODO";
@@ -1283,6 +1286,7 @@ def AMDGPU_MakeDmaDescriptorOp :
( `atomicBarrier` `(` $atomic_barrier_address^
custom<DynamicIndexList>($atomic_barrier_dynamic_indices, $atomic_barrier_static_indices)
`:` type($atomic_barrier_address) `)`)?
+ ( `iterate` $global_increment^ `,` $lds_increment `,` $iteration_count )?
attr-dict `:` qualified(type($base)) `to` type(results)
}];
diff --git a/mlir/test/Dialect/AMDGPU/ops.mlir b/mlir/test/Dialect/AMDGPU/ops.mlir
index 0db84a187ddf5..6df7c300e5bc7 100644
--- a/mlir/test/Dialect/AMDGPU/ops.mlir
+++ b/mlir/test/Dialect/AMDGPU/ops.mlir
@@ -698,16 +698,42 @@ func.func @make_dma_base(%idx: index, %mem: memref<8xi32>, %smem: memref<8xi32,
}
// CHECK-LABEL: func @make_dma_descriptor
-// CHECK-SAME: (%[[BASE:.+]]: !amdgpu.tdm_base<i32>)
-func.func @make_dma_descriptor(%base: !amdgpu.tdm_base<i32>) {
+// CHECK-SAME: (%[[BASE:.+]]: !amdgpu.tdm_base<i32>, %[[BARRIER:.+]]: memref<8xi32>, %[[IDX:.+]]: index)
+func.func @make_dma_descriptor(%base: !amdgpu.tdm_base<i32>, %barrier: memref<8xi32>, %idx: index) {
// CHECK: amdgpu.make_dma_descriptor %[[BASE]]
- // CHECK-SAME: globalSize [0]
- // CHECK-SAME: globalStride [1]
- // CHECK-SAME: sharedSize [0] : !amdgpu.tdm_base<i32> to !amdgpu.tdm_descriptor
amdgpu.make_dma_descriptor %base
+ // CHECK-SAME: globalSize [0]
globalSize [0]
+ // CHECK-SAME: globalStride [1]
globalStride [1]
+ // CHECK-SAME: sharedSize [0] : !amdgpu.tdm_base<i32> to !amdgpu.tdm_descriptor
sharedSize [0] : !amdgpu.tdm_base<i32> to !amdgpu.tdm_descriptor
+
+ // CHECK: amdgpu.make_dma_descriptor %[[BASE]]
+ amdgpu.make_dma_descriptor %base
+ // CHECK-SAME: globalSize [0]
+ globalSize [0]
+ // CHECK-SAME: globalStride [1]
+ globalStride [1]
+ // CHECK-SAME: sharedSize [0]
+ sharedSize [0]
+ // CHECK-SAME: atomicBarrier(%[[BARRIER]] [0] : memref<8xi32>)
+ atomicBarrier(%barrier [0] : memref<8xi32>)
+ : !amdgpu.tdm_base<i32> to !amdgpu.tdm_descriptor
+
+ // CHECK: amdgpu.make_dma_descriptor %[[BASE]]
+ amdgpu.make_dma_descriptor %base
+ // CHECK-SAME: globalSize [0]
+ globalSize [0]
+ // CHECK-SAME: globalStride [1]
+ globalStride [1]
+ // CHECK-SAME: sharedSize [0]
+ sharedSize [0]
+ iterate %idx, %idx, %idx
+ // CHECK-SAME: iterate %[[IDX]], %[[IDX]], %[[IDX]]
+ : !amdgpu.tdm_base<i32> to !amdgpu.tdm_descriptor
+
+
func.return
}
>From 566d2e61a7f372e2308de60e6c9a224fcd309954 Mon Sep 17 00:00:00 2001
From: Erick Ochoa <erick.ochoalopez at amd.com>
Date: Mon, 24 Nov 2025 16:13:51 -0500
Subject: [PATCH 10/13] [mlir][amdgpu] Add make_dma_descriptor.
---
mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td | 5 ++++
mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp | 22 ++++++++++++++++
mlir/test/Dialect/AMDGPU/ops.mlir | 25 +++++++++++++++++++
3 files changed, 52 insertions(+)
diff --git a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
index 8c04e45a1983e..d33605220c442 100644
--- a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
+++ b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
@@ -1265,6 +1265,10 @@ def AMDGPU_MakeDmaDescriptorOp :
OptionalAttr<DenseI64ArrayAttr>: $global_static_strides,
Variadic<Index>: $shared_dynamic_sizes,
OptionalAttr<DenseI64ArrayAttr>: $shared_static_sizes,
+ Optional<Index>: $pad,
+ OptionalAttr<IndexAttr>: $pad_const,
+ Optional<Index>: $every,
+ OptionalAttr<IndexAttr>: $every_const,
Optional<AnyMemRef>: $atomic_barrier_address,
Variadic<Index>: $atomic_barrier_dynamic_indices,
OptionalAttr<DenseI64ArrayAttr>: $atomic_barrier_static_indices,
@@ -1283,6 +1287,7 @@ def AMDGPU_MakeDmaDescriptorOp :
`globalSize` custom<DynamicIndexList>($global_dynamic_sizes, $global_static_sizes)
`globalStride` custom<DynamicIndexList>($global_dynamic_strides, $global_static_strides)
`sharedSize` custom<DynamicIndexList>($shared_dynamic_sizes, $shared_static_sizes)
+ ( `padShared` `(` custom<DynamicIndex>($pad, $pad_const)^ `every` custom<DynamicIndex>($every, $every_const) `)` )?
( `atomicBarrier` `(` $atomic_barrier_address^
custom<DynamicIndexList>($atomic_barrier_dynamic_indices, $atomic_barrier_static_indices)
`:` type($atomic_barrier_address) `)`)?
diff --git a/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp b/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp
index 4ade1164317af..b382fec21f20a 100644
--- a/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp
+++ b/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp
@@ -50,6 +50,28 @@ struct AMDGPUInlinerInterface final : DialectInlinerInterface {
};
} // namespace
+static ParseResult
+parseDynamicIndex(OpAsmParser &parser,
+ std::optional<OpAsmParser::UnresolvedOperand> dynamicSize,
+ IntegerAttr &staticSize) {
+ int64_t staticVal;
+ if (parser.parseOptionalInteger(staticVal).has_value()) {
+ staticSize = parser.getBuilder().getIndexAttr(staticVal);
+ return success();
+ }
+
+ return parser.parseOperand(dynamicSize.value());
+}
+
+static void printDynamicIndex(OpAsmPrinter &printer, Operation *op,
+ Value dynamicSize, IntegerAttr staticSize) {
+ if (staticSize) {
+ printer << staticSize.getValue();
+ } else {
+ printer << dynamicSize;
+ }
+}
+
void AMDGPUDialect::initialize() {
addOperations<
#define GET_OP_LIST
diff --git a/mlir/test/Dialect/AMDGPU/ops.mlir b/mlir/test/Dialect/AMDGPU/ops.mlir
index 6df7c300e5bc7..36a4f1644c28a 100644
--- a/mlir/test/Dialect/AMDGPU/ops.mlir
+++ b/mlir/test/Dialect/AMDGPU/ops.mlir
@@ -700,6 +700,7 @@ func.func @make_dma_base(%idx: index, %mem: memref<8xi32>, %smem: memref<8xi32,
// CHECK-LABEL: func @make_dma_descriptor
// CHECK-SAME: (%[[BASE:.+]]: !amdgpu.tdm_base<i32>, %[[BARRIER:.+]]: memref<8xi32>, %[[IDX:.+]]: index)
func.func @make_dma_descriptor(%base: !amdgpu.tdm_base<i32>, %barrier: memref<8xi32>, %idx: index) {
+
// CHECK: amdgpu.make_dma_descriptor %[[BASE]]
amdgpu.make_dma_descriptor %base
// CHECK-SAME: globalSize [0]
@@ -709,6 +710,30 @@ func.func @make_dma_descriptor(%base: !amdgpu.tdm_base<i32>, %barrier: memref<8x
// CHECK-SAME: sharedSize [0] : !amdgpu.tdm_base<i32> to !amdgpu.tdm_descriptor
sharedSize [0] : !amdgpu.tdm_base<i32> to !amdgpu.tdm_descriptor
+ // CHECK: amdgpu.make_dma_descriptor %[[BASE]]
+ amdgpu.make_dma_descriptor %base
+ // CHECK-SAME: globalSize [0]
+ globalSize [0]
+ // CHECK-SAME: globalStride [1]
+ globalStride [1]
+ // CHECK-SAME: sharedSize [0]
+ sharedSize [0]
+ // CHECK-SAME: padShared(1 every 1)
+ padShared(1 every 1)
+ : !amdgpu.tdm_base<i32> to !amdgpu.tdm_descriptor
+
+ // CHECK: amdgpu.make_dma_descriptor %[[BASE]]
+ amdgpu.make_dma_descriptor %base
+ // CHECK-SAME: globalSize [0]
+ globalSize [0]
+ // CHECK-SAME: globalStride [1]
+ globalStride [1]
+ // CHECK-SAME: sharedSize [0]
+ sharedSize [0]
+ // CHECK-SAME: padShared(1 every 1)
+ padShared(%idx every %idx)
+ : !amdgpu.tdm_base<i32> to !amdgpu.tdm_descriptor
+
// CHECK: amdgpu.make_dma_descriptor %[[BASE]]
amdgpu.make_dma_descriptor %base
// CHECK-SAME: globalSize [0]
>From 2be4ccccbcd1971da13173da3087ba8b8c56208e Mon Sep 17 00:00:00 2001
From: Erick Ochoa <erick.ochoalopez at amd.com>
Date: Mon, 24 Nov 2025 16:21:48 -0500
Subject: [PATCH 11/13] Fix indentation
---
mlir/test/Dialect/AMDGPU/ops.mlir | 46 +++++++++++++++----------------
1 file changed, 23 insertions(+), 23 deletions(-)
diff --git a/mlir/test/Dialect/AMDGPU/ops.mlir b/mlir/test/Dialect/AMDGPU/ops.mlir
index 36a4f1644c28a..0bc13e4256244 100644
--- a/mlir/test/Dialect/AMDGPU/ops.mlir
+++ b/mlir/test/Dialect/AMDGPU/ops.mlir
@@ -704,59 +704,59 @@ func.func @make_dma_descriptor(%base: !amdgpu.tdm_base<i32>, %barrier: memref<8x
// CHECK: amdgpu.make_dma_descriptor %[[BASE]]
amdgpu.make_dma_descriptor %base
// CHECK-SAME: globalSize [0]
- globalSize [0]
+ globalSize [0]
// CHECK-SAME: globalStride [1]
- globalStride [1]
+ globalStride [1]
// CHECK-SAME: sharedSize [0] : !amdgpu.tdm_base<i32> to !amdgpu.tdm_descriptor
- sharedSize [0] : !amdgpu.tdm_base<i32> to !amdgpu.tdm_descriptor
+ sharedSize [0] : !amdgpu.tdm_base<i32> to !amdgpu.tdm_descriptor
// CHECK: amdgpu.make_dma_descriptor %[[BASE]]
amdgpu.make_dma_descriptor %base
// CHECK-SAME: globalSize [0]
- globalSize [0]
+ globalSize [0]
// CHECK-SAME: globalStride [1]
- globalStride [1]
+ globalStride [1]
// CHECK-SAME: sharedSize [0]
- sharedSize [0]
+ sharedSize [0]
// CHECK-SAME: padShared(1 every 1)
- padShared(1 every 1)
- : !amdgpu.tdm_base<i32> to !amdgpu.tdm_descriptor
+ padShared(1 every 1)
+ : !amdgpu.tdm_base<i32> to !amdgpu.tdm_descriptor
// CHECK: amdgpu.make_dma_descriptor %[[BASE]]
amdgpu.make_dma_descriptor %base
// CHECK-SAME: globalSize [0]
- globalSize [0]
+ globalSize [0]
// CHECK-SAME: globalStride [1]
- globalStride [1]
+ globalStride [1]
// CHECK-SAME: sharedSize [0]
- sharedSize [0]
+ sharedSize [0]
// CHECK-SAME: padShared(1 every 1)
- padShared(%idx every %idx)
- : !amdgpu.tdm_base<i32> to !amdgpu.tdm_descriptor
+ padShared(%idx every %idx)
+ : !amdgpu.tdm_base<i32> to !amdgpu.tdm_descriptor
// CHECK: amdgpu.make_dma_descriptor %[[BASE]]
amdgpu.make_dma_descriptor %base
// CHECK-SAME: globalSize [0]
- globalSize [0]
+ globalSize [0]
// CHECK-SAME: globalStride [1]
- globalStride [1]
+ globalStride [1]
// CHECK-SAME: sharedSize [0]
- sharedSize [0]
+ sharedSize [0]
// CHECK-SAME: atomicBarrier(%[[BARRIER]] [0] : memref<8xi32>)
- atomicBarrier(%barrier [0] : memref<8xi32>)
- : !amdgpu.tdm_base<i32> to !amdgpu.tdm_descriptor
+ atomicBarrier(%barrier [0] : memref<8xi32>)
+ : !amdgpu.tdm_base<i32> to !amdgpu.tdm_descriptor
// CHECK: amdgpu.make_dma_descriptor %[[BASE]]
amdgpu.make_dma_descriptor %base
// CHECK-SAME: globalSize [0]
- globalSize [0]
+ globalSize [0]
// CHECK-SAME: globalStride [1]
- globalStride [1]
+ globalStride [1]
// CHECK-SAME: sharedSize [0]
- sharedSize [0]
- iterate %idx, %idx, %idx
+ sharedSize [0]
// CHECK-SAME: iterate %[[IDX]], %[[IDX]], %[[IDX]]
- : !amdgpu.tdm_base<i32> to !amdgpu.tdm_descriptor
+ iterate %idx, %idx, %idx
+ : !amdgpu.tdm_base<i32> to !amdgpu.tdm_descriptor
func.return
>From b3ba450d336c451b33685f67ddcd42e4a500d80c Mon Sep 17 00:00:00 2001
From: Erick Ochoa <erick.ochoalopez at amd.com>
Date: Mon, 24 Nov 2025 17:10:42 -0500
Subject: [PATCH 12/13] Review
---
mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td | 24 +++++++++++++++----
mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp | 11 +++++----
mlir/test/Dialect/AMDGPU/invalid.mlir | 2 +-
mlir/test/Dialect/AMDGPU/ops.mlir | 20 ++++++++--------
4 files changed, 37 insertions(+), 20 deletions(-)
diff --git a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
index d33605220c442..981698a8d25e6 100644
--- a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
+++ b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
@@ -1251,7 +1251,7 @@ def AMDGPU_MakeDmaBaseOp :
}];
let assemblyFormat = [{
- $src `[` $srcIndices `]` `,` $dst `[` $dstIndices `]` attr-dict `:` type($src) `,` type($dst) `to` type(results)
+ $src `[` $srcIndices `]` `,` $dst `[` $dstIndices `]` attr-dict `:` type($src) `,` type($dst) `->` type(results)
}];
}
@@ -1277,9 +1277,25 @@ def AMDGPU_MakeDmaDescriptorOp :
Optional<Index>: $iteration_count)>,
Results<(outs AMDGPU_TDMDescriptorType: $desc)> {
- let summary = "TODO";
+ let summary = "Make all descriptor groups needed by TensorLoadToLDS/TensorStoreFromLDS.";
let description = [{
- TODO
+ Make all descriptor groups needed by tensor memory operations.
+
+ The $base operand corresponds to the base pair addresses, one must be an address in LDS
+ while the other must be a global memory location.
+
+ $global_{static/dynamic}_sizes determine the size of the tensor.
+ $global_{static/dynamic}_strides determine the strides of the tensor.
+ $shared_{static/dynamic}_sizes determines the size of the tile.
+
+ Padding can be applied to the LDS address when copying from memory to LDS,
+ but not when copying from LDS to memory.
+ The values in the padded target addresses remain the same as before the operation was applied.
+
+ 2D and 3D tensors may be iterated over by setting $global_increment, $lds_increment, and $iteration_count.
+ $global_increment determines how much to increment the starting global memory address per iteration in units of the $base's element type.
+ $lds_increment determines how much to increment the starting LDS address per iteration in units of the $base's element type.
+ $iterate_count determines how many times to iterate.
}];
let assemblyFormat = [{
@@ -1292,7 +1308,7 @@ def AMDGPU_MakeDmaDescriptorOp :
custom<DynamicIndexList>($atomic_barrier_dynamic_indices, $atomic_barrier_static_indices)
`:` type($atomic_barrier_address) `)`)?
( `iterate` $global_increment^ `,` $lds_increment `,` $iteration_count )?
- attr-dict `:` qualified(type($base)) `to` type(results)
+ attr-dict `:` qualified(type($base)) `->` type(results)
}];
let hasVerifier = 1;
diff --git a/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp b/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp
index b382fec21f20a..6863dc4ad3e7f 100644
--- a/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp
+++ b/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp
@@ -54,12 +54,13 @@ static ParseResult
parseDynamicIndex(OpAsmParser &parser,
std::optional<OpAsmParser::UnresolvedOperand> dynamicSize,
IntegerAttr &staticSize) {
- int64_t staticVal;
+
+ int64_t staticVal = 0;
if (parser.parseOptionalInteger(staticVal).has_value()) {
staticSize = parser.getBuilder().getIndexAttr(staticVal);
return success();
}
-
+
return parser.parseOperand(dynamicSize.value());
}
@@ -67,9 +68,9 @@ static void printDynamicIndex(OpAsmPrinter &printer, Operation *op,
Value dynamicSize, IntegerAttr staticSize) {
if (staticSize) {
printer << staticSize.getValue();
- } else {
- printer << dynamicSize;
- }
+ return;
+ }
+ printer << dynamicSize;
}
void AMDGPUDialect::initialize() {
diff --git a/mlir/test/Dialect/AMDGPU/invalid.mlir b/mlir/test/Dialect/AMDGPU/invalid.mlir
index e8a0bfe9476a7..a72193d532ab9 100644
--- a/mlir/test/Dialect/AMDGPU/invalid.mlir
+++ b/mlir/test/Dialect/AMDGPU/invalid.mlir
@@ -361,6 +361,6 @@ func.func @scaled_mfma_invalid_k(%arg0 : vector<4xf8E8M0FNU>, %arg1 : vector<32x
// CHECK-SAME: (%[[BASE:.+]]: !amdgpu.tdm_base<i32>)
func.func @make_dma_descriptor_invalid_strides(%base: !amdgpu.tdm_base<i32>) {
// expected-error at +1 {{'amdgpu.make_dma_descriptor' op strides for the innermost dimension must be 1.}}
- amdgpu.make_dma_descriptor %base globalSize [0] globalStride [1, 2] sharedSize [0] : !amdgpu.tdm_base<i32> to !amdgpu.tdm_descriptor
+ amdgpu.make_dma_descriptor %base globalSize [0] globalStride [1, 2] sharedSize [0] : !amdgpu.tdm_base<i32> -> !amdgpu.tdm_descriptor
func.return
}
diff --git a/mlir/test/Dialect/AMDGPU/ops.mlir b/mlir/test/Dialect/AMDGPU/ops.mlir
index 0bc13e4256244..2984bedac7bf5 100644
--- a/mlir/test/Dialect/AMDGPU/ops.mlir
+++ b/mlir/test/Dialect/AMDGPU/ops.mlir
@@ -689,11 +689,11 @@ func.func @memory_counter_wait() {
// CHECK-LABEL: func @make_dma_base
// CHECK-SAME: (%[[IDX:.+]]: index, %[[MEM:.+]]: memref<8xi32>, %[[SMEM:.+]]: memref<8xi32, #gpu.address_space<workgroup>>)
func.func @make_dma_base(%idx: index, %mem: memref<8xi32>, %smem: memref<8xi32, #gpu.address_space<workgroup>>) {
- // CHECK: amdgpu.make_dma_base %[[MEM]][%[[IDX]]], %[[SMEM]][%[[IDX]]] : memref<8xi32>, memref<8xi32, #gpu.address_space<workgroup>> to !amdgpu.tdm_base<i32>
- amdgpu.make_dma_base %mem[%idx], %smem[%idx] : memref<8xi32>, memref<8xi32, #gpu.address_space<workgroup>> to !amdgpu.tdm_base<i32>
+ // CHECK: amdgpu.make_dma_base %[[MEM]][%[[IDX]]], %[[SMEM]][%[[IDX]]] : memref<8xi32>, memref<8xi32, #gpu.address_space<workgroup>> -> !amdgpu.tdm_base<i32>
+ amdgpu.make_dma_base %mem[%idx], %smem[%idx] : memref<8xi32>, memref<8xi32, #gpu.address_space<workgroup>> -> !amdgpu.tdm_base<i32>
- // CHECK: amdgpu.make_dma_base %[[SMEM]][%[[IDX]]], %[[MEM]][%[[IDX]]] : memref<8xi32, #gpu.address_space<workgroup>>, memref<8xi32> to !amdgpu.tdm_base<i32>
- amdgpu.make_dma_base %smem[%idx], %mem[%idx] : memref<8xi32, #gpu.address_space<workgroup>>, memref<8xi32> to !amdgpu.tdm_base<i32>
+ // CHECK: amdgpu.make_dma_base %[[SMEM]][%[[IDX]]], %[[MEM]][%[[IDX]]] : memref<8xi32, #gpu.address_space<workgroup>>, memref<8xi32> -> !amdgpu.tdm_base<i32>
+ amdgpu.make_dma_base %smem[%idx], %mem[%idx] : memref<8xi32, #gpu.address_space<workgroup>>, memref<8xi32> -> !amdgpu.tdm_base<i32>
func.return
}
@@ -707,8 +707,8 @@ func.func @make_dma_descriptor(%base: !amdgpu.tdm_base<i32>, %barrier: memref<8x
globalSize [0]
// CHECK-SAME: globalStride [1]
globalStride [1]
- // CHECK-SAME: sharedSize [0] : !amdgpu.tdm_base<i32> to !amdgpu.tdm_descriptor
- sharedSize [0] : !amdgpu.tdm_base<i32> to !amdgpu.tdm_descriptor
+ // CHECK-SAME: sharedSize [0] : !amdgpu.tdm_base<i32> -> !amdgpu.tdm_descriptor
+ sharedSize [0] : !amdgpu.tdm_base<i32> -> !amdgpu.tdm_descriptor
// CHECK: amdgpu.make_dma_descriptor %[[BASE]]
amdgpu.make_dma_descriptor %base
@@ -720,7 +720,7 @@ func.func @make_dma_descriptor(%base: !amdgpu.tdm_base<i32>, %barrier: memref<8x
sharedSize [0]
// CHECK-SAME: padShared(1 every 1)
padShared(1 every 1)
- : !amdgpu.tdm_base<i32> to !amdgpu.tdm_descriptor
+ : !amdgpu.tdm_base<i32> -> !amdgpu.tdm_descriptor
// CHECK: amdgpu.make_dma_descriptor %[[BASE]]
amdgpu.make_dma_descriptor %base
@@ -732,7 +732,7 @@ func.func @make_dma_descriptor(%base: !amdgpu.tdm_base<i32>, %barrier: memref<8x
sharedSize [0]
// CHECK-SAME: padShared(1 every 1)
padShared(%idx every %idx)
- : !amdgpu.tdm_base<i32> to !amdgpu.tdm_descriptor
+ : !amdgpu.tdm_base<i32> -> !amdgpu.tdm_descriptor
// CHECK: amdgpu.make_dma_descriptor %[[BASE]]
amdgpu.make_dma_descriptor %base
@@ -744,7 +744,7 @@ func.func @make_dma_descriptor(%base: !amdgpu.tdm_base<i32>, %barrier: memref<8x
sharedSize [0]
// CHECK-SAME: atomicBarrier(%[[BARRIER]] [0] : memref<8xi32>)
atomicBarrier(%barrier [0] : memref<8xi32>)
- : !amdgpu.tdm_base<i32> to !amdgpu.tdm_descriptor
+ : !amdgpu.tdm_base<i32> -> !amdgpu.tdm_descriptor
// CHECK: amdgpu.make_dma_descriptor %[[BASE]]
amdgpu.make_dma_descriptor %base
@@ -756,7 +756,7 @@ func.func @make_dma_descriptor(%base: !amdgpu.tdm_base<i32>, %barrier: memref<8x
sharedSize [0]
// CHECK-SAME: iterate %[[IDX]], %[[IDX]], %[[IDX]]
iterate %idx, %idx, %idx
- : !amdgpu.tdm_base<i32> to !amdgpu.tdm_descriptor
+ : !amdgpu.tdm_base<i32> -> !amdgpu.tdm_descriptor
func.return
>From d34c423efcaa0a655ed599e04d9ec56177270dcb Mon Sep 17 00:00:00 2001
From: Erick Ochoa <erick.ochoalopez at amd.com>
Date: Mon, 24 Nov 2025 17:53:03 -0500
Subject: [PATCH 13/13] Fix parser
---
mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp | 16 +++++++++++-----
mlir/test/Dialect/AMDGPU/ops.mlir | 2 +-
2 files changed, 12 insertions(+), 6 deletions(-)
diff --git a/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp b/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp
index 6863dc4ad3e7f..f37ba43fcaa39 100644
--- a/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp
+++ b/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp
@@ -52,16 +52,22 @@ struct AMDGPUInlinerInterface final : DialectInlinerInterface {
static ParseResult
parseDynamicIndex(OpAsmParser &parser,
- std::optional<OpAsmParser::UnresolvedOperand> dynamicSize,
+ std::optional<OpAsmParser::UnresolvedOperand> &dynamicSize,
IntegerAttr &staticSize) {
- int64_t staticVal = 0;
- if (parser.parseOptionalInteger(staticVal).has_value()) {
+ int64_t staticVal;
+ OptionalParseResult parseResult = parser.parseOptionalInteger(staticVal);
+ if (parseResult.has_value()) {
staticSize = parser.getBuilder().getIndexAttr(staticVal);
return success();
}
-
- return parser.parseOperand(dynamicSize.value());
+
+ OpAsmParser::UnresolvedOperand operand = OpAsmParser::UnresolvedOperand{};
+ if (parser.parseOperand(operand)) {
+ dynamicSize = operand;
+ return success();
+ }
+ return failure();
}
static void printDynamicIndex(OpAsmPrinter &printer, Operation *op,
diff --git a/mlir/test/Dialect/AMDGPU/ops.mlir b/mlir/test/Dialect/AMDGPU/ops.mlir
index 2984bedac7bf5..923b30ce95363 100644
--- a/mlir/test/Dialect/AMDGPU/ops.mlir
+++ b/mlir/test/Dialect/AMDGPU/ops.mlir
@@ -730,7 +730,7 @@ func.func @make_dma_descriptor(%base: !amdgpu.tdm_base<i32>, %barrier: memref<8x
globalStride [1]
// CHECK-SAME: sharedSize [0]
sharedSize [0]
- // CHECK-SAME: padShared(1 every 1)
+ // CHECK-SAME: padShared(%[[IDX]] every %[[IDX]])
padShared(%idx every %idx)
: !amdgpu.tdm_base<i32> -> !amdgpu.tdm_descriptor
More information about the Mlir-commits
mailing list