[Mlir-commits] [mlir] [mlir][ArithToAMDGPU] limit scaling truncf/extf support to gfx950 (PR #155431)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Tue Aug 26 09:01:51 PDT 2025
https://github.com/Muzammiluddin-Syed-ECE updated https://github.com/llvm/llvm-project/pull/155431
>From 5464787f7cf9f0b60bf0602059eb41fb42efb227 Mon Sep 17 00:00:00 2001
From: Muzammiluddin Syed <muzasyed at amd.com>
Date: Tue, 26 Aug 2025 07:57:14 -0700
Subject: [PATCH 1/2] limit scaling truncf/extf support to gfx950
Signed-off-by: Muzammiluddin Syed <muzasyed at amd.com>
---
mlir/lib/Conversion/ArithToAMDGPU/ArithToAMDGPU.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mlir/lib/Conversion/ArithToAMDGPU/ArithToAMDGPU.cpp b/mlir/lib/Conversion/ArithToAMDGPU/ArithToAMDGPU.cpp
index 8230591123661..c0c063b4d4923 100644
--- a/mlir/lib/Conversion/ArithToAMDGPU/ArithToAMDGPU.cpp
+++ b/mlir/lib/Conversion/ArithToAMDGPU/ArithToAMDGPU.cpp
@@ -702,7 +702,7 @@ void mlir::arith::populateArithToAMDGPUConversionPatterns(
if (allowPackedF16Rtz)
patterns.add<TruncfToFloat16RewritePattern>(patterns.getContext(), benefit);
- if (chipset >= kGfx950) {
+ if (chipset == kGfx950) {
patterns.add<ScalingExtFRewritePattern>(patterns.getContext(), benefit);
patterns.add<ScalingTruncFRewritePattern>(patterns.getContext(), benefit);
}
>From 2f2a4000ec23a0bdfafa0b517b0bae07765d32da Mon Sep 17 00:00:00 2001
From: Muzammiluddin Syed <muzasyed at amd.com>
Date: Tue, 26 Aug 2025 15:58:22 +0000
Subject: [PATCH 2/2] hoist chipset check to variable
Signed-off-by: Muzammiluddin Syed <muzasyed at amd.com>
---
.../mlir/Conversion/ArithToAMDGPU/ArithToAMDGPU.h | 10 ++++------
mlir/lib/Conversion/ArithToAMDGPU/ArithToAMDGPU.cpp | 9 +++++----
2 files changed, 9 insertions(+), 10 deletions(-)
diff --git a/mlir/include/mlir/Conversion/ArithToAMDGPU/ArithToAMDGPU.h b/mlir/include/mlir/Conversion/ArithToAMDGPU/ArithToAMDGPU.h
index f4a9518839224..fd144edf77452 100644
--- a/mlir/include/mlir/Conversion/ArithToAMDGPU/ArithToAMDGPU.h
+++ b/mlir/include/mlir/Conversion/ArithToAMDGPU/ArithToAMDGPU.h
@@ -28,12 +28,10 @@ namespace arith {
/// is set, values outside the range of the destination type are clamped
/// to the largest value of that type instead of being rewritten to Inf (aka
/// NaN).
-void populateArithToAMDGPUConversionPatterns(RewritePatternSet &patterns,
- bool convertFP8Arithmetic,
- bool saturateFP8Truncf,
- bool allowPackedF16Rtz,
- amdgpu::Chipset chipset,
- PatternBenefit benefit = 1);
+void populateArithToAMDGPUConversionPatterns(
+ RewritePatternSet &patterns, bool convertFP8Arithmetic,
+ bool saturateFP8Truncf, bool allowPackedF16Rtz, bool supportsScaledExtTrunc,
+ amdgpu::Chipset chipset, PatternBenefit benefit = 1);
} // namespace arith
} // namespace mlir
diff --git a/mlir/lib/Conversion/ArithToAMDGPU/ArithToAMDGPU.cpp b/mlir/lib/Conversion/ArithToAMDGPU/ArithToAMDGPU.cpp
index c0c063b4d4923..3d6f6cab42244 100644
--- a/mlir/lib/Conversion/ArithToAMDGPU/ArithToAMDGPU.cpp
+++ b/mlir/lib/Conversion/ArithToAMDGPU/ArithToAMDGPU.cpp
@@ -690,8 +690,8 @@ ScalingTruncFRewritePattern::matchAndRewrite(arith::ScalingTruncFOp op,
void mlir::arith::populateArithToAMDGPUConversionPatterns(
RewritePatternSet &patterns, bool convertFP8Arithmetic,
- bool saturateFP8Truncf, bool allowPackedF16Rtz, Chipset chipset,
- PatternBenefit benefit) {
+ bool saturateFP8Truncf, bool allowPackedF16Rtz, bool supportsScaledExtTrunc,
+ Chipset chipset, PatternBenefit benefit) {
if (convertFP8Arithmetic) {
patterns.add<ExtFOnFloat8RewritePattern>(patterns.getContext(), chipset,
@@ -702,7 +702,7 @@ void mlir::arith::populateArithToAMDGPUConversionPatterns(
if (allowPackedF16Rtz)
patterns.add<TruncfToFloat16RewritePattern>(patterns.getContext(), benefit);
- if (chipset == kGfx950) {
+ if (supportsScaledExtTrunc) {
patterns.add<ScalingExtFRewritePattern>(patterns.getContext(), benefit);
patterns.add<ScalingTruncFRewritePattern>(patterns.getContext(), benefit);
}
@@ -720,9 +720,10 @@ void ArithToAMDGPUConversionPass::runOnOperation() {
bool convertFP8Arithmetic =
*maybeChipset == kGfx942 || hasOcpFp8(*maybeChipset);
+ bool supportsScaledExtTrunc = *maybeChipset == kGfx950;
arith::populateArithToAMDGPUConversionPatterns(
patterns, convertFP8Arithmetic, saturateFP8Truncf, allowPackedF16Rtz,
- *maybeChipset);
+ supportsScaledExtTrunc, *maybeChipset);
if (failed(applyPatternsGreedily(op, std::move(patterns))))
return signalPassFailure();
}
More information about the Mlir-commits
mailing list