[Mlir-commits] [mlir] [mlir][amdgpu] Add scaled_ext_packed{8, 16} operations (PR #159830)

Thu Oct 16 10:53:06 PDT 2025

================
@@ -112,6 +112,72 @@ def AMDGPU_ExtPackedFp8Op :
   }];
 }
 
+def IsValidBlockSize: AttrConstraint<
+    CPred<"::llvm::cast<::mlir::IntegerAttr>($_self).getInt() == 16 || ::llvm::cast<::mlir::IntegerAttr>($_self).getInt() == 32">,
+    "whose value is 16 or 32">;
+
+def AMDGPU_ScaledExtPacked816Op
+    : AMDGPU_Op<"scaled_ext_packed816", [Pure, TypesMatchWith<"scale type is fixed",
+                       "source", "scale",
+                       "ScaledExtPacked816Op::getScaleType($_self.getContext())">]>,
+      Arguments<(
+          ins AnyTypeOf<[VectorOfLengthAndType<[8], [F4E2M1FN,F8E4M3FN,F8E5M2]>,
+                         VectorOfLengthAndType<[16], [F6E2M3FN, F6E3M2FN]>]>:$source,
+          FixedVectorOfLengthAndType<[4], [F8E8M0FNU]>:$scale,
+          ConfinedAttr<I32Attr, [IsValidBlockSize]>:$blockSize,
+          ConfinedAttr<I32Attr, [IntMinValue<0>, IntMaxValue<1>]>:$firstScaleLane,
+          ConfinedAttr<I32Attr, [IntMinValue<0>, IntMaxValue<2>]>:$firstScaleByte)>,
+      Results<(
+          outs AnyTypeOf<[FixedVectorOfLengthAndType<[8], [F32]>,
+                          FixedVectorOfLengthAndType<[8], [F16]>,
+                          FixedVectorOfLengthAndType<[8], [BF16]>,
+                          FixedVectorOfLengthAndType<[16], [F32]>,
+                          FixedVectorOfLengthAndType<[16], [F16]>,
+                          FixedVectorOfLengthAndType<[16], [BF16]>]>:$res)> {
+
+  let summary = "Extend a vector of packed floating point values";
+
+  let description = [{
+    The scales applied to the input microfloats are stored in two bytes which
+    come from the `scales` input provided in a *half* of the wave identified
+    by `firstScaleLane`. The pair of bytes used is selected by
+    `firstScaleByte`. The 16 vectors in consecutive lanes starting from
+    `firstScaleLane` (which we'll call the scale vectors) will be used by both
+    halves of the wave (with lane L reading from L % 16'th scale vector), but
+    each half will use a different byte.
+
+    When the block size is 32, `firstScaleByte` can be either 0 or 2,
+    selecting halves of the scale vectors. Lanes 0-15 will read from
+    `firstScaleByte` and lanes 16-31 will read from `firstScaleByte` + 1.
+
+    However, when the block size is 16, `firstScaleByte` can be 0 or 1.
+    Lanes 0-15 read from the `firstScaleByte`th element of the scale vectors,
+    while lanes 16-31 read from `firstScaleByte` + 2.
+
+    Note: the layout for the scales generally mirrors how the WMMA
+    instructions use for matix scales. These selection operands allows
+    one to choose portions of the matrix to convert.
+
+    Available on gfx1250+.
+  }];
+
+  let assemblyFormat = [{
+    attr-dict $source
+    `scale` `(` $scale `)`
+    `blockSize` `(` $blockSize `)`
+    `firstScaleLane` `(` $firstScaleLane`)`
+    `firstScaleByte` `(` $firstScaleByte `)`
+    `:` type($source) `to` type($res)
+  }];
+
+  let extraClassDeclaration = [{
+    static Type getScaleType(MLIRContext *ctx) {
----------------
amd-eochoalo wrote:

I would like the assembly format to only have `type($source) `to` type($res)`. 

Without using 

```tblgen
TypesMatchWith<"scale type is fixed",
                       "source", "scale",
                       "ScaledExtPacked816Op::getScaleType($_self.getContext())">]
// (which requires the definition of this extra class declaration)
```

The assembly format parser generator gives an error stating 

```
error: type of operand #1, named 'scale', is not buildable and a buildable type cannot be inferred
    attr-dict $source
```

I can inline this function like this:

```diff

diff --git a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
index 05525d3a061d..210097138807 100644
--- a/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
+++ b/mlir/include/mlir/Dialect/AMDGPU/IR/AMDGPU.td
@@ -119,7 +119,7 @@ def IsValidBlockSize: AttrConstraint<
 def AMDGPU_ScaledExtPacked816Op
     : AMDGPU_Op<"scaled_ext_packed816", [Pure, TypesMatchWith<"scale type is fixed",
                        "source", "scale",
-                       "ScaledExtPacked816Op::getScaleType($_self.getContext())">]>,
+                       "VectorType::get(4, Float8E8M0FNUType::get($_self.getContext()))">]>,
       Arguments<(
           ins AnyTypeOf<[VectorOfLengthAndType<[8], [F4E2M1FN,F8E4M3FN,F8E5M2]>,
                          VectorOfLengthAndType<[16], [F6E2M3FN, F6E3M2FN]>]>:$source,
@@ -170,12 +170,6 @@ def AMDGPU_ScaledExtPacked816Op
     `:` type($source) `to` type($res)
   }];

-  let extraClassDeclaration = [{
-    static Type getScaleType(MLIRContext *ctx) {
-      return VectorType::get(4, Float8E8M0FNUType::get(ctx));
-    }
-  }];
-
 }

 def AMDGPU_ScaledExtPackedOp
```

or if you prefer add the type declaration, or maybe another solution?

https://github.com/llvm/llvm-project/pull/159830