[Mlir-commits] [mlir] [mlir][amdgpu] Add make_dma_base operation (PR #169086)

Wed Nov 26 11:34:07 PST 2025

https://github.com/amd-eochoalo updated https://github.com/llvm/llvm-project/pull/169086

>From a4a1a59d894aae479a1bd5aebe2705431b6588b5 Mon Sep 17 00:00:00 2001
From: Erick Ochoa <erick.ochoalopez at amd.com>
Date: Fri, 21 Nov 2025 12:56:29 -0500
Subject: [PATCH 1/5] [mlir][amdgpu] Add make_dma_base operation

---
 mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td | 55 +++++++++++++++++++
 .../mlir/Dialect/AMDGPU/IR/AMDGPUDialect.h    |  4 ++
 mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp  |  7 +++
 mlir/test/Dialect/AMDGPU/ops.mlir             | 12 ++++
 4 files changed, 78 insertions(+)

diff --git a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
index 4820b7a747ac2..04043f47c3539 100644
--- a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
+++ b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
@@ -33,6 +33,7 @@ def AMDGPU_Dialect : Dialect {
     "gpu::GPUDialect"
   ];
   let useDefaultAttributePrinterParser = 1;
+  let useDefaultTypePrinterParser = 1;
 }
 
 def AnyIntegerOrFloat : AnyTypeOf<[AnySignlessInteger, AnyFloat], "Integer or Float">;
@@ -79,6 +80,36 @@ def AMDGPU_AddressSpaceAttr : EnumAttr<AMDGPU_Dialect, AMDGPU_AddressSpace,
   let assemblyFormat = "`<` $value `>`";
 }
 
+class AMDGPU_Type<string name, string typeMnemonic, list<Trait> traits = []>
+    : TypeDef<AMDGPU_Dialect, name, traits> {
+  let mnemonic = typeMnemonic;
+}
+
+//===----------------------------------------------------------------------===//
+// AMDGPU Type definitions
+//===----------------------------------------------------------------------===//
+
+def AMDGPU_TDMBaseType : AMDGPU_Type<"TDMBase", "tdm_base"> {
+  // TODO:
+  // * Add verifiers such that one of the memrefs is from LDS and the other global.
+  // * Add verifiers to make sure that the type is in the correct direction.
+  // * Add verifiers to make sure that the number of indices do not exceed the number of dimensions.
+
+  let summary = "Pair of base addresses that move data between LDS and global storage.";
+  let description = [{
+    This type is opaque and it is used to represent a struct of two addresses.
+    One address is in LDS while the other is in global memory.
+  }];
+  let parameters = (ins "Type":$elementType);
+  let builders = [
+    TypeBuilderWithInferredContext<(ins "Type":$elementType), [{
+      return $_get(elementType.getContext(), elementType);
+    }]>
+  ];
+  let assemblyFormat = "`<` $elementType `>`";
+
+}
+
 //===----------------------------------------------------------------------===//
 // AMDGPU Op definitions
 //===----------------------------------------------------------------------===//
@@ -1192,4 +1223,28 @@ def AMDGPU_ScaledMFMAOp :
   }];
   let hasCanonicalizer = 1;
 }
+
+def AMDGPU_MakeDmaBaseOp :
+    AMDGPU_Op<"make_dma_base", [AttrSizedOperandSegments]>,
+    Arguments<(ins
+                   Arg<AnyMemRef, "buffer to read from", [MemRead]>:$src,
+                   Variadic<Index>:$srcIndices,
+                   Arg<AnyMemRef, "buffer to write to", [MemWrite]>:$dst,
+                   Variadic<Index>:$dstIndices)>,
+    Results<(outs AMDGPU_TDMBaseType: $base)> {
+
+  let summary = "Pair of based addresses used when moving tiles between LDS and global memory.";
+  let description = [{
+    This operation creates a pair of addresses that will be used by tensor_load_to_lds
+    and tensor_store_from_lds.
+
+    This operation creates a value corresponding roughly to the descriptor group 0
+    found in TensorLoadToLDSOp and TensorStoreFromLDSOp in the rocdl dialect.
+  }];
+
+  let assemblyFormat = [{
+    $src `[` $srcIndices `]` `,` $dst `[` $dstIndices `]` attr-dict `:` type($src) `,` type($dst) `to` type(results)
+  }];
+}
+
 #endif // AMDGPU
diff --git a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPUDialect.h b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPUDialect.h
index dcd9f95a7561f..a7680fb5c3191 100644
--- a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPUDialect.h
+++ b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPUDialect.h
@@ -25,6 +25,7 @@
 #include "mlir/Dialect/AMDGPU/IR/AMDGPUDialect.h.inc"
 
 #include "mlir/Dialect/AMDGPU/IR/AMDGPUEnums.h.inc"
+#include "mlir/Dialect/AMDGPU/IR/AMDGPUTypes.h.inc"
 
 namespace mlir::amdgpu {
 /// Parser for the `custom<MNKDimensionList>` custom assembly format used by
@@ -52,6 +53,9 @@ inline void printMNKDimensionList(OpAsmPrinter &printer, Operation *,
 #define GET_ATTRDEF_CLASSES
 #include "mlir/Dialect/AMDGPU/IR/AMDGPUAttributes.h.inc"
 
+#define GET_TYPEDEF_CLASSES
+#include "mlir/Dialect/AMDGPU/IR/AMDGPUTypes.h.inc"
+
 #define GET_OP_CLASSES
 #include "mlir/Dialect/AMDGPU/IR/AMDGPU.h.inc"
 
diff --git a/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp b/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp
index d55f3cec47c1f..cdc10c60a42ae 100644
--- a/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp
+++ b/mlir/lib/Dialect/AMDGPU/IR/AMDGPUDialect.cpp
@@ -55,6 +55,10 @@ void AMDGPUDialect::initialize() {
 #define GET_OP_LIST
 #include "mlir/Dialect/AMDGPU/IR/AMDGPU.cpp.inc"
       >();
+  addTypes<
+#define GET_TYPEDEF_LIST
+#include "mlir/Dialect/AMDGPU/IR/AMDGPUTypes.cpp.inc"
+      >();
   addAttributes<
 #define GET_ATTRDEF_LIST
 #include "mlir/Dialect/AMDGPU/IR/AMDGPUAttributes.cpp.inc"
@@ -839,5 +843,8 @@ void ScaledMFMAOp::getCanonicalizationPatterns(RewritePatternSet &results,
 #define GET_ATTRDEF_CLASSES
 #include "mlir/Dialect/AMDGPU/IR/AMDGPUAttributes.cpp.inc"
 
+#define GET_TYPEDEF_CLASSES
+#include "mlir/Dialect/AMDGPU/IR/AMDGPUTypes.cpp.inc"
+
 #define GET_OP_CLASSES
 #include "mlir/Dialect/AMDGPU/IR/AMDGPU.cpp.inc"
diff --git a/mlir/test/Dialect/AMDGPU/ops.mlir b/mlir/test/Dialect/AMDGPU/ops.mlir
index 09134cb4704bb..653f9f64d24f4 100644
--- a/mlir/test/Dialect/AMDGPU/ops.mlir
+++ b/mlir/test/Dialect/AMDGPU/ops.mlir
@@ -685,3 +685,15 @@ func.func @memory_counter_wait() {
   amdgpu.memory_counter_wait exp(4)
   func.return
 }
+
+// CHECK-LABEL: func @make_dma_base
+// CHECK-SAME: (%[[IDX:.+]]: index, %[[MEM:.+]]: memref<8xi32>, %[[SMEM:.+]]: memref<8xi32, #gpu.address_space<workgroup>>)
+func.func @make_dma_base(%idx: index, %mem: memref<8xi32>, %smem: memref<8xi32, #gpu.address_space<workgroup>>) {
+  // CHECK: amdgpu.make_dma_base %[[MEM]][%[[IDX]]], %[[SMEM]][%[[IDX]]] : memref<8xi32>, memref<8xi32, #gpu.address_space<workgroup>> to !amdgpu.tdm_base<i32>
+  amdgpu.make_dma_base %mem[%idx], %smem[%idx] : memref<8xi32>, memref<8xi32, #gpu.address_space<workgroup>> to !amdgpu.tdm_base<i32>
+
+  // CHECK: amdgpu.make_dma_base %[[SMEM]][%[[IDX]]], %[[MEM]][%[[IDX]]] : memref<8xi32, #gpu.address_space<workgroup>>, memref<8xi32> to !amdgpu.tdm_base<i32>
+  amdgpu.make_dma_base %smem[%idx], %mem[%idx] : memref<8xi32, #gpu.address_space<workgroup>>, memref<8xi32> to !amdgpu.tdm_base<i32>
+  func.return
+}
+

>From d14f3e28cc79774adb744ae6ee6d98684f120fa7 Mon Sep 17 00:00:00 2001
From: Erick Ochoa <erick.ochoalopez at amd.com>
Date: Mon, 24 Nov 2025 09:18:26 -0500
Subject: [PATCH 2/5] Remove MemRead and MemWrite from operation

---
 mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
index 04043f47c3539..990d377dc9d7b 100644
--- a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
+++ b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
@@ -1227,9 +1227,9 @@ def AMDGPU_ScaledMFMAOp :
 def AMDGPU_MakeDmaBaseOp :
     AMDGPU_Op<"make_dma_base", [AttrSizedOperandSegments]>,
     Arguments<(ins
-                   Arg<AnyMemRef, "buffer to read from", [MemRead]>:$src,
+                   Arg<AnyMemRef, "buffer to read from">:$src,
                    Variadic<Index>:$srcIndices,
-                   Arg<AnyMemRef, "buffer to write to", [MemWrite]>:$dst,
+                   Arg<AnyMemRef, "buffer to write to">:$dst,
                    Variadic<Index>:$dstIndices)>,
     Results<(outs AMDGPU_TDMBaseType: $base)> {
 

>From d3ca18c937218a8f115e58b0a6d4d5b10bdc187a Mon Sep 17 00:00:00 2001
From: Erick Ochoa <erick.ochoalopez at amd.com>
Date: Mon, 24 Nov 2025 09:20:38 -0500
Subject: [PATCH 3/5] Add Pure to make_dma_base

---
 mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
index 990d377dc9d7b..645fc4655025a 100644
--- a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
+++ b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
@@ -1225,7 +1225,7 @@ def AMDGPU_ScaledMFMAOp :
 }
 
 def AMDGPU_MakeDmaBaseOp :
-    AMDGPU_Op<"make_dma_base", [AttrSizedOperandSegments]>,
+    AMDGPU_Op<"make_dma_base", [Pure, AttrSizedOperandSegments]>,
     Arguments<(ins
                    Arg<AnyMemRef, "buffer to read from">:$src,
                    Variadic<Index>:$srcIndices,

>From a477104c3511da3122cc3a3dc9776d0e8ae0996f Mon Sep 17 00:00:00 2001
From: Erick Ochoa <erick.ochoalopez at amd.com>
Date: Wed, 26 Nov 2025 14:32:33 -0500
Subject: [PATCH 4/5] Move TODO

---
 mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
index 645fc4655025a..db5789c266cfc 100644
--- a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
+++ b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
@@ -90,11 +90,6 @@ class AMDGPU_Type<string name, string typeMnemonic, list<Trait> traits = []>
 //===----------------------------------------------------------------------===//
 
 def AMDGPU_TDMBaseType : AMDGPU_Type<"TDMBase", "tdm_base"> {
-  // TODO:
-  // * Add verifiers such that one of the memrefs is from LDS and the other global.
-  // * Add verifiers to make sure that the type is in the correct direction.
-  // * Add verifiers to make sure that the number of indices do not exceed the number of dimensions.
-
   let summary = "Pair of base addresses that move data between LDS and global storage.";
   let description = [{
     This type is opaque and it is used to represent a struct of two addresses.
@@ -107,7 +102,6 @@ def AMDGPU_TDMBaseType : AMDGPU_Type<"TDMBase", "tdm_base"> {
     }]>
   ];
   let assemblyFormat = "`<` $elementType `>`";
-
 }
 
 //===----------------------------------------------------------------------===//
@@ -1233,6 +1227,11 @@ def AMDGPU_MakeDmaBaseOp :
                    Variadic<Index>:$dstIndices)>,
     Results<(outs AMDGPU_TDMBaseType: $base)> {
 
+  // TODO:
+  // * Add verifiers such that one of the memrefs is from LDS and the other global.
+  // * Add verifiers to make sure that the type is in the correct direction.
+  // * Add verifiers to make sure that the number of indices do not exceed the number of dimensions.
+
   let summary = "Pair of based addresses used when moving tiles between LDS and global memory.";
   let description = [{
     This operation creates a pair of addresses that will be used by tensor_load_to_lds

>From 08cd9fb5eb2165812fc061ee89fcd86ad6d68f2e Mon Sep 17 00:00:00 2001
From: Erick Ochoa <erick.ochoalopez at amd.com>
Date: Wed, 26 Nov 2025 14:33:42 -0500
Subject: [PATCH 5/5] Update description

---
 mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
index db5789c266cfc..e07c72b839e7c 100644
--- a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
+++ b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
@@ -1237,8 +1237,10 @@ def AMDGPU_MakeDmaBaseOp :
     This operation creates a pair of addresses that will be used by tensor_load_to_lds
     and tensor_store_from_lds.
 
-    This operation creates a value corresponding roughly to the descriptor group 0
+    This operation creates a value corresponding to the tensor descriptor (D#) group 0
     found in TensorLoadToLDSOp and TensorStoreFromLDSOp in the rocdl dialect.
+
+    These tensor DMA operations were introduced in gfx1250.
   }];
 
   let assemblyFormat = [{