[Mlir-commits] [mlir] [mlir][amdgpu] Add amdgpu.make_dma_descriptor (PR #169407)
Krzysztof Drewniak
llvmlistbot at llvm.org
Wed Nov 26 11:45:09 PST 2025
================
@@ -1192,4 +1227,132 @@ def AMDGPU_ScaledMFMAOp :
}];
let hasCanonicalizer = 1;
}
+
+def AMDGPU_MakeDmaBaseOp :
+ AMDGPU_Op<"make_dma_base", [Pure, AttrSizedOperandSegments]>,
+ Arguments<(ins
+ Arg<AnyMemRef, "buffer to read from">:$src,
+ Variadic<Index>:$src_indices,
+ OptionalAttr<DenseI64ArrayAttr>: $src_indices_const,
+ Arg<AnyMemRef, "buffer to write to">:$dst,
+ Variadic<Index>:$dst_indices,
+ OptionalAttr<DenseI64ArrayAttr>: $dst_indices_const)>,
+ Results<(outs AMDGPU_TDMBaseType: $base)> {
+
+ // TODO:
+ // * Add verifiers such that one of the memrefs is from LDS and the other global.
+ // * Add verifiers to make sure that the number of indices do not exceed the number of dimensions.
+
+ let summary = "Pair of based addresses used when moving tiles between LDS and global memory.";
+ let description = [{
+ This operation creates a pair of addresses that will be used by tensor_load_to_lds
+ and tensor_store_from_lds.
+
+ This operation creates a value corresponding roughly to the descriptor group 0
+ found in TensorLoadToLDSOp and TensorStoreFromLDSOp in the rocdl dialect.
+ For example:
+
+ ```mlir
+ %base = amdgpu.make_dma_base %src[%idx0], %dst[%idx1] : memref<8xi32>, memref<8xi32, #gpu.address_space<workgroup>> -> !amdgpu.tdm_base<i32>
+ %descriptor = amdgpu.make_dma_descriptor %base globalSize [2, 2] globalStride [2, 1] sharedSize [2, 2] : !amdgpu.tdm_base<i32> -> !amdgpu.tdm_descriptor
+ amdgpu.tensor_load_to_lds %descriptor : !amdgpu.tdm_descriptor
+ ```
+
+ to
+
+ ```mlir
+ // pseudocode
+ %base_0 = llvm.mlir.undef : !llvm.struct<(ptr, ptr)>
+ %base_1 = llvm.insertvalue %global_addr, %base_0[0] : !llvm.struct<(ptr, ptr)>
+ %base_2 = llvm.insertvalue %lds_addr, %base_1[1] : !llvm.struct(ptr, ptr)>
+ // type(%base_2) = !llvm.struct<(ptr, ptr) roughly corresponds to amdgpu.tdm_base<i32>
+
+ // The base will be used when contructing dgroup0
+ // when lowering amdgpu.make_dma_descriptor
+ %dgroup0_0 = llvm.mlir.undef : !llvm.struct<(....)>
+ %dgroup0_1 = llvm.insertvalue %base2, %dgroup0_0 : ....
+
+ // When lowering amdgpu.tensor_load_to_lds
+ rocdl.tensor.load.to.lds %dgroup0, %dgroup1, %dgroup2, %dgroup3 cachepolicy 0 : vector<4xi32>, vector<8xi32>
+ ```
+ }];
+
+ // TODO: Define a custom printer, parser to avoid space between $src/%dst and indices.
+ let assemblyFormat = [{
+ $src custom<DynamicIndexList>($src_indices, $src_indices_const) `,`
+ $dst custom<DynamicIndexList>($dst_indices, $dst_indices_const) attr-dict `:` type($src) `,` type($dst) `->` type(results)
+ }];
+}
+
+def AMDGPU_MakeDmaDescriptorOp :
+ AMDGPU_Op<"make_dma_descriptor", [Pure, AttrSizedOperandSegments]>,
+ Arguments<(ins
+ AMDGPU_TDMBaseType: $base,
+ Variadic<Index>: $global_dynamic_sizes,
+ OptionalAttr<DenseI64ArrayAttr>: $global_static_sizes,
+ Variadic<Index>: $global_dynamic_strides,
+ OptionalAttr<DenseI64ArrayAttr>: $global_static_strides,
+ Variadic<Index>: $shared_dynamic_sizes,
+ OptionalAttr<DenseI64ArrayAttr>: $shared_static_sizes,
+ Optional<Index>: $pad,
+ OptionalAttr<IndexAttr>: $pad_const,
+ Optional<Index>: $every,
+ OptionalAttr<IndexAttr>: $every_const,
----------------
krzysz00 wrote:
This can just be an optional index - doesn't need attribute splitting
https://github.com/llvm/llvm-project/pull/169407
More information about the Mlir-commits
mailing list