[Mlir-commits] [mlir] [mlir][ROCDL] Add fp4 and fp6 conversion intrinsics, fix fp8 immargs (PR #140801)

Tue May 20 14:11:46 PDT 2025

llvmbot wrote:



@llvm/pr-subscribers-mlir-llvm

@llvm/pr-subscribers-backend-amdgpu

Author: Krzysztof Drewniak (krzysz00)

<details>
<summary>Changes</summary>

This PR adds support for the scaled conversion intrinsics for fp4 and fp6 types so that they can be targetted by a future amdgpu dialect op or used directly.

Additionally, this patch refactors the copy-paste-heavy fp8 versions of these scaled conversion intrinsics with tablegen `foreach` loops, and fixes the fact that certain immargs weren't being stored as attributes.

Note that some of the MLIR-level tests for those scaled fp8 intrinsics had incorrect return types, which have been fixed.

(Note that while the operations have a known return type, the IR format still prints that type for clarity).

---

Patch is 76.02 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/140801.diff


6 Files Affected:

- (modified) mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td (+361-325) 
- (modified) mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp (+10-14) 
- (modified) mlir/test/Conversion/AMDGPUToROCDL/8-bit-floats-ocp.mlir (+10-20) 
- (modified) mlir/test/Conversion/AMDGPUToROCDL/8-bit-floats.mlir (+10-20) 
- (modified) mlir/test/Dialect/LLVMIR/rocdl.mlir (+109-37) 
- (modified) mlir/test/Target/LLVMIR/rocdl.mlir (+106-34) 


``````````diff

diff --git a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
index 6fb9e3aba1f0a..1dadb7d9e8852 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
@@ -709,20 +709,23 @@ def ROCDL_PermlaneX16Op : ROCDL_IntrOp<"permlanex16", [], [0],
   }];
 }
 
-def ROCDL_V2I16Type : FixedVectorOfLengthAndType<[2], [I16]>,
-                        BuildableType<"::mlir::VectorType::get("
-                          "{2},$_builder.getI16Type())">;
+class ROCDL_ConcreteVector<Type elem, int length> :
+  FixedVectorOfLengthAndType<[length], [elem]>,
+  BuildableType<
+    "::mlir::VectorType::get({" # length # "} ,"
+      # elem.builderCall # ")">;
+
+def ROCDL_V2I16Type : ROCDL_ConcreteVector<I16, 2>;
+def ROCDL_V2F16Type : ROCDL_ConcreteVector<F16, 2>;
+def ROCDL_V2BF16Type : ROCDL_ConcreteVector<BF16, 2>;
+def ROCDL_V2F32Type : ROCDL_ConcreteVector<F32, 2>;
+def ROCDL_V6I32Type : ROCDL_ConcreteVector<I32, 6>;
+def ROCDL_V8I32Type : ROCDL_ConcreteVector<I32, 8>;
+def ROCDL_V16F32Type : ROCDL_ConcreteVector<F32, 16>;
+def ROCDL_V32F16Type : ROCDL_ConcreteVector<F16, 32>;
+def ROCDL_V32BF16Type : ROCDL_ConcreteVector<BF16, 32>;
+def ROCDL_V32F32Type : ROCDL_ConcreteVector<F32, 32>;
 
-def ROCDL_V2F16Type : FixedVectorOfLengthAndType<[2], [F16]>,
-                        BuildableType<"::mlir::VectorType::get("
-                          "{2},$_builder.getF16Type())">;
-
-def ROCDL_V2BF16Type : FixedVectorOfLengthAndType<[2], [BF16]>,
-                        BuildableType<"::mlir::VectorType::get("
-                          "{2},$_builder.getBF16Type())">;
-
-// TODO: The word and byte selectors are immarg in LLVM
-// update to be attributes in MLIR
 //===---------------------------------------------------------------------===//
 // 16-bit float intrinsics
 //===---------------------------------------------------------------------===//
@@ -738,279 +741,12 @@ def ROCDL_CvtPkRtz:
   }];
 }
 
-def ROCDL_CvtScaleF32PkFp8F16Op :
-    ROCDL_IntrOp<"cvt.scalef32.pk.fp8.f16", [], [], [Pure], 1>,
-    Arguments<(ins ROCDL_V2I16Type: $old, ROCDL_V2F16Type: $src, F32: $scale, I1:$wordSel)> {
-    let summary = "Scale and convert f16 to packed fp8";
-    let description = [{
-    Scale `src` by the exponent in `scale`, then convert to packed fp8.
-    Store the result in low/high word of `old` based on $wordSel, preserving the other word.
-  }];
-  let assemblyFormat = [{
-    attr-dict $src `,` $scale `->` $old `[` $wordSel `]` `:` type($res)
-  }];
-}
-
-def ROCDL_CvtScaleF32PkFp8Bf16Op :
-    ROCDL_IntrOp<"cvt.scalef32.pk.fp8.bf16", [], [], [Pure], 1>,
-    Arguments<(ins ROCDL_V2I16Type: $old, ROCDL_V2BF16Type: $src, F32: $scale, I1:$wordSel)> {
-    let summary = "Scale and convert packed bf16 to packed fp8";
-    let description = [{
-    Scale `src` by the exponent in `scale`, then convert to packed fp8.
-    Store the result in low/high word of `old` based on $wordSel, preserving the other word.
-  }];
-  let assemblyFormat = [{
-    attr-dict $src `,` $scale `->` $old `[` $wordSel `]` `:` type($res)
-  }];
-}
-
-
-def ROCDL_CvtScaleF32PkBf8F16Op :
-    ROCDL_IntrOp<"cvt.scalef32.pk.bf8.f16", [], [], [Pure], 1>,
-    Arguments<(ins ROCDL_V2I16Type: $old, ROCDL_V2F16Type: $src, F32: $scale, I1:$wordSel)> {
-    let summary = "Scale and convert f16 to packed bf8";
-    let description = [{
-    Scale `src` by the exponent in `scale`, then convert to packed bf8.
-    Store the result in low/high word of `old` based on $wordSel, preserving the other word.
-  }];
-  let assemblyFormat = [{
-    attr-dict $src `,` $scale `->` $old `[` $wordSel `]` `:` type($res)
-  }];
-}
-
-
-def ROCDL_CvtScaleF32PkBf8Bf16Op :
-    ROCDL_IntrOp<"cvt.scalef32.pk.bf8.bf16", [], [], [Pure], 1>,
-    Arguments<(ins ROCDL_V2I16Type: $old, ROCDL_V2BF16Type: $src, F32: $scale, I1:$wordSel)> {
-    let summary = "Scale and convert bf16 to packed bf8";
-    let description = [{
-    Scale `src` by the exponent in `scale`, then convert to packed bf8.
-    Store the result in low/high word of `old` based on $wordSel, preserving the other word.
-  }];
-  let assemblyFormat = [{
-    attr-dict $src `,` $scale `->` $old `[` $wordSel `]` `:` type($res)
-  }];
-}
-
-def ROCDL_CvtScaleF32SrFp8F16Op :
-    ROCDL_IntrOp<"cvt.scalef32.sr.fp8.f16", [], [], [Pure], 1>,
-    Arguments<(ins I32:$old, F16:$src, I32:$seed, F32: $scale, I32:$byteSel)> {
-    let summary = "Scale and convert f16 to packed fp8 using stochastic rounding";
-    let description = [{
-    Scale `src` by the exponent in `scale`, then convert to packed p8 with stochastic rounding
-    using seed data in `seed`. Store into the `byteSel`th byte of `old`, preserving the others.
-
-  }];
-  let assemblyFormat = [{
-    attr-dict $src `,` $seed `,` $scale `->` $old `[` $byteSel `]` `:` type($res)
-  }];
-}
-
-def ROCDL_CvtScaleF32SrBf8F16Op :
-    ROCDL_IntrOp<"cvt.scalef32.sr.bf8.f16", [], [], [Pure], 1>,
-    Arguments<(ins I32:$old, F16:$src, I32:$seed, F32: $scale, I32:$byteSel)> {
-    let summary = "Scale and convert f16 to packed bf8 using stochastic rounding";
-    let description = [{
-    Scale `src` by the exponent in `scale`, then convert to packed bf8 with stochastic rounding
-    using seed data in `seed`. Store into the `byteSel`th byte of `old`, preserving the others.
-
-  }];
-  let assemblyFormat = [{
-    attr-dict $src `,` $seed `,` $scale `->` $old `[` $byteSel `]` `:` type($res)
-  }];
-}
-
-def ROCDL_CvtScaleF32SrFp8Bf16Op :
-    ROCDL_IntrOp<"cvt.scalef32.sr.fp8.bf16", [], [], [Pure], 1>,
-    Arguments<(ins I32:$old, BF16:$src, I32:$seed, F32: $scale, I32:$byteSel)> {
-    let summary = "Scale and convert packed bf16 to packed fp8 using stochastic rounding";
-    let description = [{
-    Scale `src` by the exponent in `scale`, then convert to packed fp8 with stochastic rounding
-    using seed data in `seed`. Store into the `byteSel`th byte of `old`, preserving the others.
-
-  }];
-  let assemblyFormat = [{
-    attr-dict $src `,` $seed `,` $scale `->` $old `[` $byteSel `]` `:` type($res)
-  }];
-}
-
-def ROCDL_CvtScaleF32SrBf8Bf16Op :
-    ROCDL_IntrOp<"cvt.scalef32.sr.bf8.bf16", [], [], [Pure], 1>,
-    Arguments<(ins I32:$old, BF16:$src, I32:$seed, F32: $scale, I32:$byteSel)> {
-    let summary = "Scale and convert bf16 to packed fp8 using stochastic rounding";
-    let description = [{
-    Scale `src` by the exponent in `scale`, then convert to packed p8 with stochastic rounding
-    using seed data in `seed`. Store into the `byteSel`th byte of `old`, preserving the others.
-
-  }];
-  let assemblyFormat = [{
-    attr-dict $src `,` $seed `,` $scale `->` $old `[` $byteSel `]` `:` type($res)
-  }];
-}
-
-def ROCDL_CvtScaleF32PkF16Fp8Op :
-    ROCDL_IntrOp<"cvt.scalef32.pk.f16.fp8", [], [], [Pure], 1>,
-    Arguments<(ins I32:$src, F32: $scale, I1:$wordSel)> {
-    let summary = "Convert fp8 to packed f16 and scale";
-    let description = [{ Convert `src` based on $wordSel to packed f16, then scale
-    the packed values by the exponent in `scale`.
-  }];
-  let assemblyFormat = [{
-    attr-dict $src `[` $wordSel `]` `,` $scale `:` type($res)
-  }];
-}
-
-def ROCDL_CvtScaleF32PkF16Bf8Op :
-    ROCDL_IntrOp<"cvt.scalef32.pk.f16.bf8", [], [], [Pure], 1>,
-    Arguments<(ins I32:$src, F32: $scale, I1:$wordSel)> {
-    let summary = "convert bf8 to packed f16 and scale";
-    let description = [{ Convert `src` based on $wordSel to packed f16, then scale
-    the packed values by exponent in `scale`.
-  }];
-  let assemblyFormat = [{
-    attr-dict $src `[` $wordSel `]` `,` $scale `:` type($res)
-  }];
-}
-
-def ROCDL_CvtScaleF32PkBf16Fp8Op :
-    ROCDL_IntrOp<"cvt.scalef32.pk.bf16.fp8", [], [], [Pure], 1>,
-    Arguments<(ins I32:$src, F32: $scale, I1:$wordSel)> {
-    let summary = "Convert fp8 to packed bf16 and scale";
-    let description = [{ Convert `src` based on $wordSel to packed bf16, then scale
-    the packed values by the exponent in `scale`.
-  }];
-  let assemblyFormat = [{
-    attr-dict $src `[` $wordSel `]` `,` $scale `:` type($res)
-  }];
-}
-
-def ROCDL_CvtScaleF32PkBf16Bf8Op :
-    ROCDL_IntrOp<"cvt.scalef32.pk.bf16.bf8", [], [], [Pure], 1>,
-    Arguments<(ins I32:$src, F32: $scale, I1:$wordSel)> {
-    let summary = "Convert bf8 to packed bf16 and scale";
-    let description = [{ Convert `src` based on $wordSel to packed bf16, then scale
-    the packed values by the exponent in `scale`.
-  }];
-  let assemblyFormat = [{
-    attr-dict $src `[` $wordSel `]` `,` $scale `:` type($res)
-  }];
-}
-
-def ROCDL_CvtScaleF16Fp8Op :
-    ROCDL_IntrOp<"cvt.scalef32.f16.fp8", [], [], [Pure], 1>,
-    Arguments<(ins ROCDL_V2F16Type:$old, I32:$src, F32: $scale, I32:$byteSel, I1:$wordSel)> {
-    let summary = "Scale and convert fp8 to f16";
-    let description = [{ Convert `src` based on $wordSel to f16, then scale the value
-    by the exponent in `scale`. Store the result into the `byteSel`th byte of `old`,
-    preserving the others.
-  }];
-  let assemblyFormat = [{
-    attr-dict $src `[` $wordSel `]` `,` $scale `->` $old `[` $byteSel `]` `:` type($res)
-  }];
-}
-
-def ROCDL_CvtScaleF16Bf8Op :
-    ROCDL_IntrOp<"cvt.scalef32.f16.bf8", [], [], [Pure], 1>,
-    Arguments<(ins ROCDL_V2F16Type:$old, I32:$src, F32: $scale, I32:$byteSel, I1:$wordSel)> {
-    let summary = "Scale and convert fp8 to f16";
-    let description = [{ Convert `src` based on $wordSel to f16, then scale the value
-    by the exponent in `scale`. Store the result into the `byteSel`th byte of `old`,
-    preserving the others.
-  }];
-  let assemblyFormat = [{
-    attr-dict $src `[` $wordSel `]` `,` $scale `->` $old `[` $byteSel `]` `:` type($res)
-  }];
-}
-
-//===---------------------------------------------------------------------===//
-// 32-bit float intrinsics
-//===---------------------------------------------------------------------===//
-def ROCDL_CvtScaleF32PkF32Fp8Op :
-    ROCDL_IntrOp<"cvt.scalef32.pk.f32.fp8", [], [], [Pure], 1>,
-    Arguments<(ins I32:$src, F32: $scale, I1:$wordSel)> {
-  let summary = "Scale and convert packed fp8 to packed f32";
-  let description = [{
-    Convert `src` based on $wordSel to packed fp32, then scale the packed values by
-    the exponent in `scale`. Store the result in a vector.
-  }];
-  let assemblyFormat = [{
-    attr-dict $src `[` $wordSel `]` `,` $scale `:` type($res)
-  }];
-}
-def ROCDL_CvtScaleF32PkF32Bf8Op :
-    ROCDL_IntrOp<"cvt.scalef32.pk.f32.bf8", [], [], [Pure], 1>,
-    Arguments<(ins I32:$src, F32: $scale, I1:$wordSel)> {
-  let summary = "Scale and convert packed bf8 to packed f32";
-  let description = [{
-    Convert `src` based on $wordSel to packed fp32, then scale the packed values by
-    the exponent in `scale`. Store the result in a vector.
-  }];
-  let assemblyFormat = [{
-    attr-dict $src `[` $wordSel `]` `,` $scale `:` type($res)
-  }];
-}
-//===---------------------------------------------------------------------===//
-// 8-bit float scale intrinsics
-//===---------------------------------------------------------------------===//
-def ROCDL_CvtScaleF32PkFp8F32Op :
-    ROCDL_IntrOp<"cvt.scalef32.pk.fp8.f32", [], [], [Pure], 1>,
-    Arguments<(ins ROCDL_V2I16Type:$old, F32:$srcA, F32:$srcB, F32:$scale, I1:$wordSel)> {
-  let summary = "Scale and convert two f32's to packed fp8";
-  let description = [{
-    Scale `srcA` and `srcB` by the exponent in `scale` then convert to packed fp8
-    and store into the low/high word of `old`, preserving the other word.
-  }];
-  let assemblyFormat = [{
-    attr-dict $srcA `,` $srcB `,` $scale `->` $old `[` $wordSel `]` `:` type($res)
-  }];
-}
-
-def ROCDL_CvtScaleF32PkBf8F32Op :
-    ROCDL_IntrOp<"cvt.scalef32.pk.bf8.f32", [], [], [Pure], 1>,
-    Arguments<(ins ROCDL_V2I16Type:$old, F32:$srcA, F32:$srcB, F32: $scale, I1:$wordSel)> {
-  let summary = "Scale and convert two f32's to packed bf8";
-  let description = [{
-    Scale `srcA` and `srcB` by the exponent in `scale` then convert to packed bf8
-    and store into the low/high word of `old`, preserving the other word.
-  }];
-  let assemblyFormat = [{
-    attr-dict $srcA `,` $srcB `,` $scale `->` $old `[` $wordSel `]` `:` type($res)
-  }];
-}
-
-def ROCDL_CvtScaleF32SrFp8F32Op :
-    ROCDL_IntrOp<"cvt.scalef32.sr.fp8.f32", [], [], [Pure], 1>,
-    Arguments<(ins I32:$old, F32:$src, I32:$seed, F32: $scale, I32:$byteSel)> {
-    let summary = "Scale and convert f32 to fp8 using stochastic rounding";
-    let description = [{
-       Scale `src` by the exponent in `scale` then convert to fp8 with stochastic rounding
-       using seed data in `seed`. store into the `byteSel`th byte of `old`, preserving the others.
-    }];
-  let assemblyFormat = [{
-    attr-dict $src `,` $seed `,` $scale `->` $old `[` $byteSel `]` `:` type($res)
-  }];
-}
-
-
-def ROCDL_CvtScaleF32SrBf8F32Op :
-    ROCDL_IntrOp<"cvt.scalef32.sr.bf8.f32", [], [], [Pure], 1>,
-    Arguments<(ins I32:$old, F32:$src, I32:$seed, F32: $scale, I32:$byteSel)> {
-    let summary = "Scale and convert f32 to bf8 using stochastic rounding";
-    let description = [{
-       Scale `src` by the exponent in `scale` then convert to bf8 with stochastic rounding
-       using seed data in `seed`. store into the `byteSel`th byte of `old`, preserving the others.
-    }];
-  let assemblyFormat = [{
-    attr-dict $src `,` $seed `,` $scale `->` $old `[` $byteSel `]` `:` type($res)
-  }];
-}
-
 //===---------------------------------------------------------------------===//
 // 8-bit float intrinsics
 //===---------------------------------------------------------------------===//
 def ROCDL_CvtF32Bf8Op :
-    ROCDL_IntrOp<"cvt.f32.bf8", [], [], [Pure], 1>,
-    Arguments<(ins I32:$srcA, I32:$byteSel)> {
+    ROCDL_ConcreteNonMemIntrOp<"cvt.f32.bf8", [Pure], 1, [1], ["byteSel"]>,
+    Arguments<(ins I32:$srcA, I32Attr:$byteSel)> {
   let summary = "Convert bf8 to f32";
   let description = [{
     Convert 8-bit bf8 value from the `byteSel`th bit of `srcA` to fp32.
@@ -1020,23 +756,9 @@ def ROCDL_CvtF32Bf8Op :
   }];
 }
 
-def ROCDL_CvtScaleF32Bf8Op :
-    ROCDL_IntrOp<"cvt.scalef32.f32.bf8", [], [], [Pure], 1>,
-    Arguments<(ins I32:$src, F32: $scale, I32:$byteSel)> {
-  let summary = "Scale and convert bf8 to f32";
-  let description = [{
-    Scale `src` by the exponent in `scale` then convert 8-bit bf8 value
-    from the `byteSel`th bit of `src` to fp32.
-  }];
-  let assemblyFormat = [{
-    attr-dict $src `[` $byteSel `]` `,` $scale `:` type($res)
-  }];
-}
-
-
 def ROCDL_CvtF32Fp8Op :
-    ROCDL_IntrOp<"cvt.f32.fp8", [], [], [Pure], 1>,
-    Arguments<(ins I32:$srcA, I32:$byteSel)> {
+    ROCDL_ConcreteNonMemIntrOp<"cvt.f32.fp8", [Pure], 1, [1], ["byteSel"]>,
+    Arguments<(ins I32:$srcA, I32Attr:$byteSel)> {
   let summary = "Convert fp8 to f32";
   let description = [{
     Convert 8-bit fp8 value from the `byteSel`th bit of `srcA` to fp32.
@@ -1046,24 +768,9 @@ def ROCDL_CvtF32Fp8Op :
   }];
 }
 
-
-def ROCDL_CvtScaleF32Fp8Op :
-    ROCDL_IntrOp<"cvt.scalef32.f32.fp8", [], [], [Pure], 1>,
-    Arguments<(ins I32:$src, F32: $scale, I32:$byteSel)> {
-  let summary = "Scale and convert fp8 to f32";
-  let description = [{
-    Scale `src` by the exponent in `scale` then convert 8-bit fp8 value
-    from the `byteSel`th bit of `src` to fp32.
-
-  }];
-  let assemblyFormat = [{
-    attr-dict $src `[` $byteSel `]` `,` $scale `:` type($res)
-  }];
-}
-
 def ROCDL_CvtPkF32Fp8Op :
-    ROCDL_IntrOp<"cvt.pk.f32.fp8", [], [], [Pure], 1>,
-    Arguments<(ins I32:$src, I1:$wordSel)> {
+    ROCDL_ConcreteNonMemIntrOp<"cvt.pk.f32.fp8", [Pure], 1, [1], ["wordSel"]>,
+    Arguments<(ins I32:$src, I1Attr:$wordSel)> {
   let summary = "Convert packed fp8 to packed f32";
   let description = [{
     Convert `src` based on $wordSel to packed fp32.
@@ -1074,8 +781,8 @@ def ROCDL_CvtPkF32Fp8Op :
 }
 
 def ROCDL_CvtPkF32Bf8Op :
-    ROCDL_IntrOp<"cvt.pk.f32.bf8", [], [], [Pure], 1>,
-    Arguments<(ins I32:$src, I1:$wordSel)> {
+    ROCDL_ConcreteNonMemIntrOp<"cvt.pk.f32.bf8", [Pure], 1, [1], ["wordSel"]>,
+    Arguments<(ins I32:$src, I1Attr:$wordSel)> {
   let summary = "Convert packed bf8 to packed f32";
   let description = [{
     Convert `src` based on $wordSel to packed fp32,
@@ -1086,8 +793,8 @@ def ROCDL_CvtPkF32Bf8Op :
 }
 
 def ROCDL_CvtPkBf8F32Op :
-    ROCDL_IntrOp<"cvt.pk.bf8.f32", [], [], [Pure], 1>,
-    Arguments<(ins F32:$srcA, F32:$srcB, I32:$old, I1:$wordSel)> {
+    ROCDL_ConcreteNonMemIntrOp<"cvt.pk.bf8.f32", [Pure], 1, [3], ["wordSel"]>,
+    Arguments<(ins F32:$srcA, F32:$srcB, I32:$old, I1Attr:$wordSel)> {
   let summary = "Convert two f32's to bf8";
   let description = [{
     Convert `srcA` and `srcB` to bf8 and store into the low/high word of
@@ -1099,8 +806,8 @@ def ROCDL_CvtPkBf8F32Op :
 }
 
 def ROCDL_CvtPkFp8F32Op :
-    ROCDL_IntrOp<"cvt.pk.fp8.f32", [], [], [Pure], 1>,
-    Arguments<(ins F32:$srcA, F32:$srcB, I32:$old, I1:$wordSel)> {
+    ROCDL_ConcreteNonMemIntrOp<"cvt.pk.fp8.f32", [Pure], 1, [3], ["wordSel"]>,
+    Arguments<(ins F32:$srcA, F32:$srcB, I32:$old, I1Attr:$wordSel)> {
   let summary = "Convert two f32's to fp8";
   let description = [{
     Convert `srcA` and `srcB` to fp8 and store into the low/high word of
@@ -1112,8 +819,8 @@ def ROCDL_CvtPkFp8F32Op :
 }
 
 def ROCDL_CvtSrBf8F32Op :
-    ROCDL_IntrOp<"cvt.sr.bf8.f32", [], [], [Pure], 1>,
-    Arguments<(ins F32:$srcA, I32:$srcB, I32:$old, I32:$byteSel)> {
+    ROCDL_ConcreteNonMemIntrOp<"cvt.sr.bf8.f32", [Pure], 1, [3], ["byteSel"]>,
+    Arguments<(ins F32:$srcA, I32:$srcB, I32:$old, I32Attr:$byteSel)> {
   let summary = "Convert f32 to bf8, stochiastic rounding";
   let description = [{
     Convert `srcA` to bf8, adding the rounding factor from `srcB`,
@@ -1125,8 +832,8 @@ def ROCDL_CvtSrBf8F32Op :
 }
 
 def ROCDL_CvtSrFp8F32Op :
-    ROCDL_IntrOp<"cvt.sr.fp8.f32", [], [], [Pure], 1>,
-    Arguments<(ins F32:$srcA, I32:$srcB, I32:$old, I32:$byteSel)> {
+    ROCDL_ConcreteNonMemIntrOp<"cvt.sr.fp8.f32", [Pure], 1, [3], ["byteSel"]>,
+    Arguments<(ins F32:$srcA, I32:$srcB, I32:$old, I32Attr:$byteSel)> {
   let summary = "Convert f32 to fp8, stochiastic rounding";
   let description = [{
     Convert `srcA` to fp8, adding the rounding factor from `srcB`,
@@ -1137,6 +844,335 @@ def ROCDL_CvtSrFp8F32Op :
   }];
 }
 
+//===---------------------------------------------------------------------===//
+// Scaled float conversion intrinsics
+//
+// These are using some tablegen trickery to avoid repetitive documentation
+//===---------------------------------------------------------------------===//
+
+// Pair used so we can iterate over types..
+class ScaleArgInfo<TypeConstraint argTyVal, string typeName> {
+  TypeConstraint type = argTyVal;
+  string name = !tolower(typeName);
+  string nameForOp = typeName;
+}
+
+//===---------------------------------------------------------------------===//
+// Scaled 32x6-bit float float conversion intrinsics
+//===---------------------------------------------------------------------===//
+foreach smallT = [
+   // MLIR f6E2M3FN
+  ScaleArgInfo<ROCDL_V6I32Type, "Fp6">,
+   // MLIR f8E3M2FN
+  ScaleArgInfo<ROCDL_V6I32Type, "Bf6">
+] in {
+  foreach largeT = [
+    ScaleArgInfo<ROCDL_V32F16Type, "F16">,
+    ScaleArgInfo<ROCDL_V32BF16Type, "Bf16">,
+    ScaleArgInfo<ROCDL_V32F32Type, "F32">,
+  ] in {
+    // Note: rouding down f32 values has a special case where
+    // we have to use 2 16xf32 arguments.
+    if !ne(largeT.name, "f32") then {
+      def ROCDL_CvtScaleF32Pk32 # smallT.nameForOp # largeT.nameForOp # Op :
+          ROCDL_ConcreteNonMemIntrOp<"cvt.scalef32.pk32." # smallT.name # "." # largeT.name,
+            [Pure], 1>,
+          Arguments<(ins largeT.type:$src, F32:$scale)> {
+        let results = (outs smallT.type:$res);
+        let summary = "Scale and convert packed "
+          # largeT.name # " to packed " # smallT.name;
+        let description = [{
+          Convert 32 packed }] # largeT.name # [{ values to packed }]
+          # smallT.name # [{, dividing by the exponent part of `scale`
+          before doing so.
+        }];
+        let assemblyFormat = [{
+          attr-dict $src `,` $scale `:` type($res)
+        }];
+      }
+    } // if
+
+    def ROCDL_CvtScaleF32SrPk32 # smallT.nameForOp # largeT.nameForOp # Op :
+        ROCDL_ConcreteNonMemIntrOp<"cvt.scalef32.sr.pk32." # smallT.name # ...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/140801