[Mlir-commits] [mlir] [MLIR][ROCDL] Add Scale Convert f8 <-> F32 Support for GFX950 (PR #125564)
Corbin Robeck
llvmlistbot at llvm.org
Mon Feb 3 11:53:13 PST 2025
https://github.com/CRobeck created https://github.com/llvm/llvm-project/pull/125564
Add Rocdl support for the following GFX950 instructions:
CVT_SCALE_PK_FP8_F32
CVT_SCALE_PK_BF8_F32
CVT_SCALE_SR_FP8_F32
CVT_SCALE_SR_BF8_F32
CVT_SCALE_PK_F32_FP8
CVT_SCALE_PK_F32_BF8
CVT_SCALE_F32_FP8
CVT_SCALE_F32_BF8
>From 9cdd119970cb3e82ec3a481191efc7e0fa4a1506 Mon Sep 17 00:00:00 2001
From: Corbin Robeck <corbin.robeck at amd.com>
Date: Wed, 29 Jan 2025 04:06:26 +0000
Subject: [PATCH 1/7] update ops
---
mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td | 18 ++++++++++++++++++
mlir/test/Dialect/LLVMIR/rocdl.mlir | 3 +++
2 files changed, 21 insertions(+)
diff --git a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
index 974712c581537a..0ce72e33adb0dd 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
@@ -736,6 +736,24 @@ def ROCDL_CvtPkRtz:
}];
}
+//===---------------------------------------------------------------------===//
+// 8-bit float scale intrinsics
+//===---------------------------------------------------------------------===//
+def ROCDL_CvtScaleF32PkFp8F32:
+ ROCDL_IntrOp<"cvt.scalef32.pk.fp8.f32", [], [], [Pure], 1>,
+ Arguments<(ins I32:$old, F32:$srcA, F32:$srcB, F32: $scale, I1:$wordSel)> {
+ let summary = "Scale and convert two f32's to packed bf8";
+ let description = [{
+ Scale `srcA` and `srcB` by the exponent in `scale` then convert to packed bf8
+ and store into the low/high word of `old`, preserving the other word.
+ }];
+ let assemblyFormat = [{
+ attr-dict $srcA `,` $srcB `,` $scale `->` $old `[` $wordSel `]` `:` type($res)
+ }];
+}
+
+
+
//===---------------------------------------------------------------------===//
// 8-bit float intrinsics
//===---------------------------------------------------------------------===//
diff --git a/mlir/test/Dialect/LLVMIR/rocdl.mlir b/mlir/test/Dialect/LLVMIR/rocdl.mlir
index 5186e43398f01b..bd1879e9fa760b 100644
--- a/mlir/test/Dialect/LLVMIR/rocdl.mlir
+++ b/mlir/test/Dialect/LLVMIR/rocdl.mlir
@@ -756,16 +756,19 @@ llvm.func @rocdl_8bit_floats(%source: i32, %stoch: i32) -> i32 {
// CHECK: rocdl.cvt.f32.fp8
// CHECK: rocdl.cvt.pk.bf8.f32
// CHECK: rocdl.cvt.pk.fp8.f32
+// CHECK: rocdl.cvt.scalef32.pk.fp8.f32
// CHECK: rocdl.cvt.sr.bf8.f32
// CHECK: rocdl.cvt.sr.fp8.f32
%c0 = llvm.mlir.constant(0 : i32) : i32
%c2 = llvm.mlir.constant(2 : i32) : i32
%c3 = llvm.mlir.constant(3 : i32) : i32
+ %c4 = llvm.mlir.constant(1.0 : f32) : f32
%false = llvm.mlir.constant(false) : i1
%v1 = rocdl.cvt.f32.bf8 %source[%c0] : f32
%v2 = rocdl.cvt.f32.fp8 %source[%c0] : f32
%source2 = rocdl.cvt.pk.bf8.f32 %v1, %v2 -> %source[%false] : i32
%source3 = rocdl.cvt.pk.fp8.f32 %v1, %v2 -> %source2[%false] : i32
+ %source3_scaled = rocdl.cvt.scalef32.pk.fp8.f32 %v1, %v2, %c4 -> %source2[%false] : i32
%source4 = rocdl.cvt.sr.bf8.f32 %v1, %stoch -> %source3[%c2] : i32
%source5 = rocdl.cvt.sr.fp8.f32 %v2, %stoch -> %source4[%c3] : i32
llvm.return %source5 : i32
>From 03b1a9a0c52b61f8d305410f66dbec77d69ec8b8 Mon Sep 17 00:00:00 2001
From: Corbin Robeck <corbin.robeck at amd.com>
Date: Wed, 29 Jan 2025 04:11:34 +0000
Subject: [PATCH 2/7] update ops
---
mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td | 17 ++++++++++++++---
mlir/test/Dialect/LLVMIR/rocdl.mlir | 1 +
2 files changed, 15 insertions(+), 3 deletions(-)
diff --git a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
index 0ce72e33adb0dd..9922fc9d8e800b 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
@@ -742,9 +742,9 @@ def ROCDL_CvtPkRtz:
def ROCDL_CvtScaleF32PkFp8F32:
ROCDL_IntrOp<"cvt.scalef32.pk.fp8.f32", [], [], [Pure], 1>,
Arguments<(ins I32:$old, F32:$srcA, F32:$srcB, F32: $scale, I1:$wordSel)> {
- let summary = "Scale and convert two f32's to packed bf8";
+ let summary = "Scale and convert two f32's to packed fp8";
let description = [{
- Scale `srcA` and `srcB` by the exponent in `scale` then convert to packed bf8
+ Scale `srcA` and `srcB` by the exponent in `scale` then convert to packed fp8
and store into the low/high word of `old`, preserving the other word.
}];
let assemblyFormat = [{
@@ -752,7 +752,18 @@ def ROCDL_CvtScaleF32PkFp8F32:
}];
}
-
+def ROCDL_CvtScaleF32PkBf8F32:
+ ROCDL_IntrOp<"cvt.scalef32.pk.bf8.f32", [], [], [Pure], 1>,
+ Arguments<(ins I32:$old, F32:$srcA, F32:$srcB, F32: $scale, I1:$wordSel)> {
+ let summary = "Scale and convert two f32's to packed bf8";
+ let description = [{
+ Scale `srcA` and `srcB` by the exponent in `scale` then convert to packed bf8
+ and store into the low/high word of `old`, preserving the other word.
+ }];
+ let assemblyFormat = [{
+ attr-dict $srcA `,` $srcB `,` $scale `->` $old `[` $wordSel `]` `:` type($res)
+ }];
+}
//===---------------------------------------------------------------------===//
// 8-bit float intrinsics
diff --git a/mlir/test/Dialect/LLVMIR/rocdl.mlir b/mlir/test/Dialect/LLVMIR/rocdl.mlir
index bd1879e9fa760b..22c0de33698f6c 100644
--- a/mlir/test/Dialect/LLVMIR/rocdl.mlir
+++ b/mlir/test/Dialect/LLVMIR/rocdl.mlir
@@ -770,6 +770,7 @@ llvm.func @rocdl_8bit_floats(%source: i32, %stoch: i32) -> i32 {
%source3 = rocdl.cvt.pk.fp8.f32 %v1, %v2 -> %source2[%false] : i32
%source3_scaled = rocdl.cvt.scalef32.pk.fp8.f32 %v1, %v2, %c4 -> %source2[%false] : i32
%source4 = rocdl.cvt.sr.bf8.f32 %v1, %stoch -> %source3[%c2] : i32
+ %source4_scaled = rocdl.cvt.scalef32.pk.bf8.f32 %v1, %v2, %c4 -> %source2[%false] : i32
%source5 = rocdl.cvt.sr.fp8.f32 %v2, %stoch -> %source4[%c3] : i32
llvm.return %source5 : i32
}
>From 6e343ff828661e07f8a9266e776347a62ffde7fc Mon Sep 17 00:00:00 2001
From: Corbin Robeck <corbin.robeck at amd.com>
Date: Wed, 29 Jan 2025 04:38:00 +0000
Subject: [PATCH 3/7] update ops
---
mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td | 14 ++++++++++++++
mlir/test/Dialect/LLVMIR/rocdl.mlir | 1 +
2 files changed, 15 insertions(+)
diff --git a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
index 9922fc9d8e800b..363575a44babe9 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
@@ -765,6 +765,20 @@ def ROCDL_CvtScaleF32PkBf8F32:
}];
}
+def ROCDL_CvtScaleF32SrFp8F32:
+ ROCDL_IntrOp<"cvt.scalef32.sr.fp8.f32", [], [], [Pure], 1>,
+ Arguments<(ins I32:$old, F32:$src, I32:$seed, F32: $scale, I32:$byteSel)> {
+ let summary = "Scale and convert two f32's to fp8 using stochastic rounding";
+ let description = [{
+ Scale `src` by the exponent in `scale` then convert to fp8 with stochastic rounding
+ using seed data in `seed`. store into the `byteSel`th byte of `old`, preserving the others.
+ }];
+ let assemblyFormat = [{
+ attr-dict $src `,` $seed `,` $scale `->` $old `[` $byteSel `]` `:` type($res)
+ }];
+}
+
+
//===---------------------------------------------------------------------===//
// 8-bit float intrinsics
//===---------------------------------------------------------------------===//
diff --git a/mlir/test/Dialect/LLVMIR/rocdl.mlir b/mlir/test/Dialect/LLVMIR/rocdl.mlir
index 22c0de33698f6c..4157a2de8dc251 100644
--- a/mlir/test/Dialect/LLVMIR/rocdl.mlir
+++ b/mlir/test/Dialect/LLVMIR/rocdl.mlir
@@ -772,6 +772,7 @@ llvm.func @rocdl_8bit_floats(%source: i32, %stoch: i32) -> i32 {
%source4 = rocdl.cvt.sr.bf8.f32 %v1, %stoch -> %source3[%c2] : i32
%source4_scaled = rocdl.cvt.scalef32.pk.bf8.f32 %v1, %v2, %c4 -> %source2[%false] : i32
%source5 = rocdl.cvt.sr.fp8.f32 %v2, %stoch -> %source4[%c3] : i32
+ %source5_scaled = rocdl.cvt.scalef32.sr.fp8.f32 %v2, %stoch, %c4 -> %source4[%c3] : i32
llvm.return %source5 : i32
}
>From 95fd6d6a760489c8ab5c1c651bb172ccb7164b48 Mon Sep 17 00:00:00 2001
From: Corbin Robeck <corbin.robeck at amd.com>
Date: Wed, 29 Jan 2025 04:44:03 +0000
Subject: [PATCH 4/7] update ops
---
mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td | 17 ++++++++++++++++-
mlir/test/Dialect/LLVMIR/rocdl.mlir | 2 ++
2 files changed, 18 insertions(+), 1 deletion(-)
diff --git a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
index 363575a44babe9..255b8d940d10a0 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
@@ -768,7 +768,7 @@ def ROCDL_CvtScaleF32PkBf8F32:
def ROCDL_CvtScaleF32SrFp8F32:
ROCDL_IntrOp<"cvt.scalef32.sr.fp8.f32", [], [], [Pure], 1>,
Arguments<(ins I32:$old, F32:$src, I32:$seed, F32: $scale, I32:$byteSel)> {
- let summary = "Scale and convert two f32's to fp8 using stochastic rounding";
+ let summary = "Scale and convert f32 to fp8 using stochastic rounding";
let description = [{
Scale `src` by the exponent in `scale` then convert to fp8 with stochastic rounding
using seed data in `seed`. store into the `byteSel`th byte of `old`, preserving the others.
@@ -779,6 +779,21 @@ def ROCDL_CvtScaleF32SrFp8F32:
}
+def ROCDL_CvtScaleF32SrBf8F32:
+ ROCDL_IntrOp<"cvt.scalef32.sr.bf8.f32", [], [], [Pure], 1>,
+ Arguments<(ins I32:$old, F32:$src, I32:$seed, F32: $scale, I32:$byteSel)> {
+ let summary = "Scale and convert f32 to bf8 using stochastic rounding";
+ let description = [{
+ Scale `src` by the exponent in `scale` then convert to bf8 with stochastic rounding
+ using seed data in `seed`. store into the `byteSel`th byte of `old`, preserving the others.
+ }];
+ let assemblyFormat = [{
+ attr-dict $src `,` $seed `,` $scale `->` $old `[` $byteSel `]` `:` type($res)
+ }];
+}
+
+
+
//===---------------------------------------------------------------------===//
// 8-bit float intrinsics
//===---------------------------------------------------------------------===//
diff --git a/mlir/test/Dialect/LLVMIR/rocdl.mlir b/mlir/test/Dialect/LLVMIR/rocdl.mlir
index 4157a2de8dc251..747e401a450507 100644
--- a/mlir/test/Dialect/LLVMIR/rocdl.mlir
+++ b/mlir/test/Dialect/LLVMIR/rocdl.mlir
@@ -773,6 +773,8 @@ llvm.func @rocdl_8bit_floats(%source: i32, %stoch: i32) -> i32 {
%source4_scaled = rocdl.cvt.scalef32.pk.bf8.f32 %v1, %v2, %c4 -> %source2[%false] : i32
%source5 = rocdl.cvt.sr.fp8.f32 %v2, %stoch -> %source4[%c3] : i32
%source5_scaled = rocdl.cvt.scalef32.sr.fp8.f32 %v2, %stoch, %c4 -> %source4[%c3] : i32
+ %source6 = rocdl.cvt.sr.bf8.f32 %v1, %stoch -> %source3[%c3] : i32
+ %source6_scaled = rocdl.cvt.scalef32.sr.bf8.f32 %v2, %stoch, %c4 -> %source3[%c3] : i32
llvm.return %source5 : i32
}
>From adeb57fbb1c60723f18bb3f5827fa4bb3dcb90b5 Mon Sep 17 00:00:00 2001
From: Corbin Robeck <corbin.robeck at amd.com>
Date: Mon, 3 Feb 2025 17:09:33 +0000
Subject: [PATCH 5/7] update ops
---
mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td | 14 ++++++++++++++
mlir/test/Dialect/LLVMIR/rocdl.mlir | 1 +
2 files changed, 15 insertions(+)
diff --git a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
index 255b8d940d10a0..db67e2bd6ffa75 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
@@ -736,6 +736,20 @@ def ROCDL_CvtPkRtz:
}];
}
+def ROCDL_CvtScalePkF32Fp8 :
+ ROCDL_IntrOp<"cvt.scalef32.pk.f32.fp8", [], [], [Pure], 1>,
+ Arguments<(ins I32:$src, F32: $scale, I1:$wordSel)> {
+ let summary = "Scale and convert packed fp8 to packed f32";
+ let description = [{
+ Scale `src` by the exponent in `scale` then convert to packed fp32.
+ Store the result in low/high word based on $wordSel, preserving the other word.
+ }];
+ let assemblyFormat = [{
+ attr-dict $src `[` $wordSel `]` `,` $scale `:` type($res)
+ }];
+}
+
+
//===---------------------------------------------------------------------===//
// 8-bit float scale intrinsics
//===---------------------------------------------------------------------===//
diff --git a/mlir/test/Dialect/LLVMIR/rocdl.mlir b/mlir/test/Dialect/LLVMIR/rocdl.mlir
index 747e401a450507..cc00d53dee9ef2 100644
--- a/mlir/test/Dialect/LLVMIR/rocdl.mlir
+++ b/mlir/test/Dialect/LLVMIR/rocdl.mlir
@@ -775,6 +775,7 @@ llvm.func @rocdl_8bit_floats(%source: i32, %stoch: i32) -> i32 {
%source5_scaled = rocdl.cvt.scalef32.sr.fp8.f32 %v2, %stoch, %c4 -> %source4[%c3] : i32
%source6 = rocdl.cvt.sr.bf8.f32 %v1, %stoch -> %source3[%c3] : i32
%source6_scaled = rocdl.cvt.scalef32.sr.bf8.f32 %v2, %stoch, %c4 -> %source3[%c3] : i32
+ %source7_scaled = rocdl.cvt.scalef32.pk.f32.fp8 %source[%false], %c4 : f32
llvm.return %source5 : i32
}
>From 9f4f056df51a9ac8574d47805b94a2d6af3385e4 Mon Sep 17 00:00:00 2001
From: Corbin Robeck <corbin.robeck at amd.com>
Date: Mon, 3 Feb 2025 17:53:28 +0000
Subject: [PATCH 6/7] update ops
---
mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td | 17 +++++++++++++++++
mlir/test/Dialect/LLVMIR/rocdl.mlir | 1 +
2 files changed, 18 insertions(+)
diff --git a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
index db67e2bd6ffa75..b2a73cd6066181 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
@@ -736,6 +736,9 @@ def ROCDL_CvtPkRtz:
}];
}
+//===---------------------------------------------------------------------===//
+// 32-bit float intrinsics
+//===---------------------------------------------------------------------===//
def ROCDL_CvtScalePkF32Fp8 :
ROCDL_IntrOp<"cvt.scalef32.pk.f32.fp8", [], [], [Pure], 1>,
Arguments<(ins I32:$src, F32: $scale, I1:$wordSel)> {
@@ -748,6 +751,20 @@ def ROCDL_CvtScalePkF32Fp8 :
attr-dict $src `[` $wordSel `]` `,` $scale `:` type($res)
}];
}
+def ROCDL_CvtScalePkF32Bf8 :
+ ROCDL_IntrOp<"cvt.scalef32.pk.f32.bf8", [], [], [Pure], 1>,
+ Arguments<(ins I32:$src, F32: $scale, I1:$wordSel)> {
+ let summary = "Scale and convert packed bf8 to packed f32";
+ let description = [{
+ Scale `src` by the exponent in `scale` then convert to packed fp32.
+ Store the result in low/high word based on $wordSel, preserving the other word.
+ }];
+ let assemblyFormat = [{
+ attr-dict $src `[` $wordSel `]` `,` $scale `:` type($res)
+ }];
+}
+
+
//===---------------------------------------------------------------------===//
diff --git a/mlir/test/Dialect/LLVMIR/rocdl.mlir b/mlir/test/Dialect/LLVMIR/rocdl.mlir
index cc00d53dee9ef2..962c633a99bb4e 100644
--- a/mlir/test/Dialect/LLVMIR/rocdl.mlir
+++ b/mlir/test/Dialect/LLVMIR/rocdl.mlir
@@ -776,6 +776,7 @@ llvm.func @rocdl_8bit_floats(%source: i32, %stoch: i32) -> i32 {
%source6 = rocdl.cvt.sr.bf8.f32 %v1, %stoch -> %source3[%c3] : i32
%source6_scaled = rocdl.cvt.scalef32.sr.bf8.f32 %v2, %stoch, %c4 -> %source3[%c3] : i32
%source7_scaled = rocdl.cvt.scalef32.pk.f32.fp8 %source[%false], %c4 : f32
+ %source8_scaled = rocdl.cvt.scalef32.pk.f32.bf8 %source[%false], %c4 : f32
llvm.return %source5 : i32
}
>From e2a01f74bafaa1c6d2bbfe70835b7cfaaf0b4d56 Mon Sep 17 00:00:00 2001
From: Corbin Robeck <corbin.robeck at amd.com>
Date: Mon, 3 Feb 2025 18:33:32 +0000
Subject: [PATCH 7/7] update ops
---
mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td | 30 ++++++++++++++++++++
mlir/test/Dialect/LLVMIR/rocdl.mlir | 9 ++++++
2 files changed, 39 insertions(+)
diff --git a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
index b2a73cd6066181..a5fbc476317218 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/ROCDLOps.td
@@ -840,6 +840,20 @@ def ROCDL_CvtF32Bf8Op :
}];
}
+def ROCDL_CvtScaleF32Bf8Op :
+ ROCDL_IntrOp<"cvt.scalef32.f32.bf8", [], [], [Pure], 1>,
+ Arguments<(ins I32:$src, F32: $scale, I32:$byteSel)> {
+ let summary = "Scale and convert bf8 to f32";
+ let description = [{
+ Scale `src` by the exponent in `scale` then convert 8-bit bf8 value
+ from the `byteSel`th bit of `src` to fp32.
+ }];
+ let assemblyFormat = [{
+ attr-dict $src `[` $byteSel `]` `,` $scale `:` type($res)
+ }];
+}
+
+
def ROCDL_CvtF32Fp8Op :
ROCDL_IntrOp<"cvt.f32.fp8", [], [], [Pure], 1>,
Arguments<(ins I32:$srcA, I32:$byteSel)> {
@@ -852,6 +866,22 @@ def ROCDL_CvtF32Fp8Op :
}];
}
+
+def ROCDL_CvtScaleF32Fp8Op :
+ ROCDL_IntrOp<"cvt.scalef32.f32.fp8", [], [], [Pure], 1>,
+ Arguments<(ins I32:$src, F32: $scale, I32:$byteSel)> {
+ let summary = "Scale and convert fp8 to f32";
+ let description = [{
+ Scale `src` by the exponent in `scale` then convert 8-bit fp8 value
+ from the `byteSel`th bit of `src` to fp32.
+
+ }];
+ let assemblyFormat = [{
+ attr-dict $src `[` $byteSel `]` `,` $scale `:` type($res)
+ }];
+}
+
+
def ROCDL_CvtPkBf8F32Op :
ROCDL_IntrOp<"cvt.pk.bf8.f32", [], [], [Pure], 1>,
Arguments<(ins F32:$srcA, F32:$srcB, I32:$old, I1:$wordSel)> {
diff --git a/mlir/test/Dialect/LLVMIR/rocdl.mlir b/mlir/test/Dialect/LLVMIR/rocdl.mlir
index 962c633a99bb4e..5f99a07f7fdac5 100644
--- a/mlir/test/Dialect/LLVMIR/rocdl.mlir
+++ b/mlir/test/Dialect/LLVMIR/rocdl.mlir
@@ -754,11 +754,18 @@ llvm.func @rocdl_8bit_floats(%source: i32, %stoch: i32) -> i32 {
// CHECK-LABEL: @rocdl_8bit_floats
// CHECK: rocdl.cvt.f32.bf8
// CHECK: rocdl.cvt.f32.fp8
+// CHECK: rocdl.cvt.scalef32.f32.bf8
+// CHECK: rocdl.cvt.scalef32.f32.fp8
// CHECK: rocdl.cvt.pk.bf8.f32
// CHECK: rocdl.cvt.pk.fp8.f32
// CHECK: rocdl.cvt.scalef32.pk.fp8.f32
// CHECK: rocdl.cvt.sr.bf8.f32
// CHECK: rocdl.cvt.sr.fp8.f32
+// CHECK: rocdl.cvt.scalef32.sr.fp8.f32
+// CHECK: rocdl.cvt.sr.bf8.f32
+// CHECK: rocdl.cvt.scalef32.sr.bf8.f32
+// CHECK: rocdl.cvt.scalef32.pk.f32.fp8
+// CHECK: rocdl.cvt.scalef32.pk.f32.bf8
%c0 = llvm.mlir.constant(0 : i32) : i32
%c2 = llvm.mlir.constant(2 : i32) : i32
%c3 = llvm.mlir.constant(3 : i32) : i32
@@ -766,6 +773,8 @@ llvm.func @rocdl_8bit_floats(%source: i32, %stoch: i32) -> i32 {
%false = llvm.mlir.constant(false) : i1
%v1 = rocdl.cvt.f32.bf8 %source[%c0] : f32
%v2 = rocdl.cvt.f32.fp8 %source[%c0] : f32
+ %v1_scaled = rocdl.cvt.scalef32.f32.bf8 %source[%c0], %c4 : f32
+ %v2_scaled = rocdl.cvt.scalef32.f32.fp8 %source[%c0], %c4 : f32
%source2 = rocdl.cvt.pk.bf8.f32 %v1, %v2 -> %source[%false] : i32
%source3 = rocdl.cvt.pk.fp8.f32 %v1, %v2 -> %source2[%false] : i32
%source3_scaled = rocdl.cvt.scalef32.pk.fp8.f32 %v1, %v2, %c4 -> %source2[%false] : i32
More information about the Mlir-commits
mailing list