[Mlir-commits] [mlir] [mlir][Arith] Add FTZ (Flush-to-Zero) fast-math flag (PR #112700)

Thu Oct 17 05:31:45 PDT 2024

https://github.com/chelini created https://github.com/llvm/llvm-project/pull/112700

The Flush to Zero (FTZ) modifier is used in floating-point arithmetic to set  very small numbers, known as denormal or subnormal numbers, to zero. FTZ is done to improve performance, as handling these small numbers can slow down computations. Note that this attribute does not specify if the rounding happens toward positive or negative zero since it is architecture (or vendor)-dependent.

>From 98b22fd3c2d90d98a2def8827e2f844e150f0e68 Mon Sep 17 00:00:00 2001
From: lorenzo chelini <lchelini at nvidia.com>
Date: Thu, 17 Oct 2024 14:21:31 +0200
Subject: [PATCH] [mlir][Arith] Add FTZ (Flush-to-Zero) fast-math flag
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Flush to Zero (FTZ) modifier is used in floating-point arithmetic to set 
very small numbers, known as denormal or subnormal numbers, to zero. FTZ is done
to improve performance, as handling these small numbers can slow down
computations. Note that this attribute does not specify if the rounding happens
toward positive or negative zero since it is architecture (or vendor)-dependent.
---
 mlir/include/mlir/Dialect/Arith/IR/ArithBase.td |  6 ++++--
 mlir/test/Dialect/Arith/ops.mlir                | 16 +++++++++++++++-
 mlir/test/Dialect/Math/ops.mlir                 |  2 +-
 3 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/mlir/include/mlir/Dialect/Arith/IR/ArithBase.td b/mlir/include/mlir/Dialect/Arith/IR/ArithBase.td
index 19a2ade2e95a0e..b7dc984817a333 100644
--- a/mlir/include/mlir/Dialect/Arith/IR/ArithBase.td
+++ b/mlir/include/mlir/Dialect/Arith/IR/ArithBase.td
@@ -108,12 +108,13 @@ def FASTMATH_NO_SIGNED_ZEROS : I32BitEnumAttrCaseBit<"nsz",      3>;
 def FASTMATH_ALLOW_RECIP     : I32BitEnumAttrCaseBit<"arcp",     4>;
 def FASTMATH_ALLOW_CONTRACT  : I32BitEnumAttrCaseBit<"contract", 5>;
 def FASTMATH_APPROX_FUNC     : I32BitEnumAttrCaseBit<"afn",      6>;
+def FASTMATH_FTZ             : I32BitEnumAttrCaseBit<"ftz",      7>;
 def FASTMATH_FAST            : I32BitEnumAttrCaseGroup<
     "fast",
     [
       FASTMATH_REASSOC,         FASTMATH_NO_NANS,     FASTMATH_NO_INFS,
       FASTMATH_NO_SIGNED_ZEROS, FASTMATH_ALLOW_RECIP, FASTMATH_ALLOW_CONTRACT,
-      FASTMATH_APPROX_FUNC]>;
+      FASTMATH_APPROX_FUNC, FASTMATH_FTZ]>;
 
 def FastMathFlags : I32BitEnumAttr<
     "FastMathFlags",
@@ -121,7 +122,8 @@ def FastMathFlags : I32BitEnumAttr<
     [
       FASTMATH_NONE,           FASTMATH_REASSOC,         FASTMATH_NO_NANS,
       FASTMATH_NO_INFS,        FASTMATH_NO_SIGNED_ZEROS, FASTMATH_ALLOW_RECIP,
-      FASTMATH_ALLOW_CONTRACT, FASTMATH_APPROX_FUNC,     FASTMATH_FAST]> {
+      FASTMATH_ALLOW_CONTRACT, FASTMATH_APPROX_FUNC,     FASTMATH_FTZ,
+      FASTMATH_FAST]> {
   let separator = ",";
   let cppNamespace = "::mlir::arith";
   let genSpecializedAttr = 0;
diff --git a/mlir/test/Dialect/Arith/ops.mlir b/mlir/test/Dialect/Arith/ops.mlir
index f684e02344a517..0209d612273e6d 100644
--- a/mlir/test/Dialect/Arith/ops.mlir
+++ b/mlir/test/Dialect/Arith/ops.mlir
@@ -1127,7 +1127,7 @@ func.func @fastmath(%arg0: f32, %arg1: f32, %arg2: i32) {
 // CHECK: {{.*}} = arith.addf %arg0, %arg1 fastmath<nnan,ninf> : f32
   %7 = arith.addf %arg0, %arg1 fastmath<nnan,ninf> : f32
 // CHECK: {{.*}} = arith.mulf %arg0, %arg1 fastmath<fast> : f32
-  %8 = arith.mulf %arg0, %arg1 fastmath<reassoc,nnan,ninf,nsz,arcp,contract,afn> : f32
+  %8 = arith.mulf %arg0, %arg1 fastmath<reassoc,nnan,ninf,nsz,arcp,contract,afn,ftz> : f32
 // CHECK: {{.*}} = arith.cmpf oeq, %arg0, %arg1 fastmath<fast> : f32
   %9 = arith.cmpf oeq, %arg0, %arg1 fastmath<fast> : f32
 
@@ -1161,3 +1161,17 @@ func.func @intflags_func(%arg0: i64, %arg1: i64) {
   %3 = arith.shli %arg0, %arg1 overflow<nsw, nuw> : i64
   return
 }
+
+// CHECK-LABEL: flush_to_zero
+// CHECK-SAME: %[[ARG0:.+]]: f32, %[[ARG1:.+]]: f32
+func.func @flush_to_zero(%arg0: f32, %arg1: f32) {
+  // CHECK: %{{.+}} = arith.addf %[[ARG0]], %[[ARG1]] fastmath<ftz> : f32
+  // CHECK-NEXT: %{{.+}} = arith.subf %[[ARG0]], %[[ARG1]] fastmath<ftz> : f32
+  // CHECK-NEXT: %{{.+}} = arith.mulf %[[ARG0]], %[[ARG1]] fastmath<ftz> : f32
+  // CHECK-NEXT: %{{.+}} = arith.divf %[[ARG0]], %[[ARG1]] fastmath<ftz> : f32
+  %0 = arith.addf %arg0, %arg1 fastmath<ftz> : f32
+  %1 = arith.subf %arg0, %arg1 fastmath<ftz> : f32
+  %2 = arith.mulf %arg0, %arg1 fastmath<ftz> : f32
+  %3 = arith.divf %arg0, %arg1 fastmath<ftz> : f32
+  return
+}
diff --git a/mlir/test/Dialect/Math/ops.mlir b/mlir/test/Dialect/Math/ops.mlir
index 7e45d9bc6f74a9..6accd09647b8bb 100644
--- a/mlir/test/Dialect/Math/ops.mlir
+++ b/mlir/test/Dialect/Math/ops.mlir
@@ -289,7 +289,7 @@ func.func @fastmath(%f: f32, %i: i32, %v: vector<4xf32>, %t: tensor<4x4x?xf32>)
   // CHECK: math.trunc %[[F]] fastmath<fast> : f32
   %0 = math.trunc %f fastmath<fast> : f32
   // CHECK: math.powf %[[V]], %[[V]] fastmath<fast> : vector<4xf32>
-  %1 = math.powf %v, %v fastmath<reassoc,nnan,ninf,nsz,arcp,contract,afn> : vector<4xf32>
+  %1 = math.powf %v, %v fastmath<reassoc,nnan,ninf,nsz,arcp,contract,afn,ftz> : vector<4xf32>
   // CHECK: math.fma %[[T]], %[[T]], %[[T]] : tensor<4x4x?xf32>
   %2 = math.fma %t, %t, %t fastmath<none> : tensor<4x4x?xf32>
   // CHECK: math.absf %[[F]] fastmath<ninf> : f32