[Mlir-commits] [mlir] [ROCDL] Added matrix load-transpose ops for gfx1250+ (PR #165564)
Ravil Dorozhinskii
llvmlistbot at llvm.org
Fri Oct 31 17:51:13 PDT 2025
================
@@ -650,6 +649,76 @@ def ROCDL_ds_read_tr8_b64 : ROCDL_LDS_Read_Tr_IntrOp<"ds.read.tr8.b64">;
def ROCDL_ds_read_tr6_b96 : ROCDL_LDS_Read_Tr_IntrOp<"ds.read.tr6.b96">;
def ROCDL_ds_read_tr16_b64 : ROCDL_LDS_Read_Tr_IntrOp<"ds.read.tr16.b64">;
+
+
+//===---------------------------------------------------------------------===//
+// Glb/DS load-transpose intrinsics (available in GFX1250+)
+
+class WrapperType<Type t, int w> {
+ Type type = t;
+ int bitwidth = w;
+}
+class IType<I t> : WrapperType<t, t.bitwidth> {}
+class FType<F t> : WrapperType<t, t.bitwidth> {}
+def BF16Type : WrapperType<BF16, 16> {}
+
+
+class AddrKind<string n, int s> {
+ string name = n;
+ int space = s;
+ LLVM_PointerInAddressSpace type = LLVM_PointerInAddressSpace<s>;
+}
+def GlobalAddrKind : AddrKind<"global", 1>;
+def DSAddrKind : AddrKind<"ds", 3>;
+
+class ROCDL_TrLoadOpMeta<AddrKind addKind, int inElemBits, int outElemBits, WrapperType outElemType> {
+ string inBits = !cast<string>(inElemBits);
+ string outBits = !cast<string>(outElemBits);
+ LLVM_PointerInAddressSpace inType = addKind.type;
+ int outNumElem = !div(outElemBits, outElemType.bitwidth);
+ ROCDL_ConcreteVector outType = ROCDL_ConcreteVector<outElemType.type, outNumElem>;
+ string inBitsEnc = !if(!eq(addKind.space, 1),
+ !if(!eq(inElemBits, 8),
+ !if(!eq(inElemBits, 16), "", inBits), inBits), inBits);
+ string mnemonic = addKind.name # ".load.tr" # inBitsEnc # ".b" # outBits;
+}
+
+class ROCDL_TrLoadOp<ROCDL_TrLoadOpMeta meta> :
+ ROCDL_IntrOp<meta.mnemonic, [1], [], [], 1, 0, 1> {
+
+ dag args = (ins Arg<meta.inType, "", [MemRead]>:$ptr);
+ let arguments = !con(args, baseArgs);
+ let results = (outs meta.outType:$res);
+ let summary = "Loads and transposes a matrix from global memory or ds to registers (available in gfx1250+).";
+ let description = [{
+ Load a matrix of }] # meta.inBits # [{-bit data from the global memory,
+ transpose data between row-major and column-major order,
+ and store the result into a }] # meta.outBits # [{-bit vector register.
+
+ Available in gfx1250+.
+ }];
+ let assemblyFormat = "$ptr attr-dict `:` type($ptr) `->` type($res)";
+ let extraClassDefinition = [{
+ ::llvm::SmallVector<::mlir::Value> $cppClass::getAccessedOperands() {
+ return {getPtr()};
+ }
+ }];
+}
+
+def ROCDL_GlobalLoadTr4_2I32 : ROCDL_TrLoadOp<ROCDL_TrLoadOpMeta<GlobalAddrKind, 4, 64, IType<I32>>>;
+def ROCDL_GlobalLoadTr8_2I32 : ROCDL_TrLoadOp<ROCDL_TrLoadOpMeta<GlobalAddrKind, 8, 64, IType<I32>>>;
+def ROCDL_GlobalLoadTr6_3I32 : ROCDL_TrLoadOp<ROCDL_TrLoadOpMeta<GlobalAddrKind, 6, 96, IType<I32>>>;
+def ROCDL_GlobalLoadTr8_8I16 : ROCDL_TrLoadOp<ROCDL_TrLoadOpMeta<GlobalAddrKind, 16, 128, IType<I16>>>;
+//def ROCDL_GlobalLoadTr8_8F16 : ROCDL_TrLoadOp<ROCDL_TrLoadOpMeta<GlobalAddrKind, 8, 128, FType<F16>>>;
+//def ROCDL_GlobalLoadTr8_8BF16 : ROCDL_TrLoadOp<ROCDL_TrLoadOpMeta<GlobalAddrKind, 8, 128, BF16Type>>;
----------------
ravil-mobile wrote:
Sure. Removed constraints from the output type.
https://github.com/llvm/llvm-project/pull/165564
More information about the Mlir-commits
mailing list