[llvm] [SelectionDAG] Remove UnsafeFPMath check in `visitFADDForFMACombine` (PR #127770)

via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 20 18:05:16 PST 2025


https://github.com/paperchalice updated https://github.com/llvm/llvm-project/pull/127770

>From 1044329cd31af4762a5c0fb42afa3c86589bf633 Mon Sep 17 00:00:00 2001
From: PaperChalice <liujunchang97 at outlook.com>
Date: Wed, 19 Feb 2025 16:57:20 +0800
Subject: [PATCH 1/5] [SelectionDAG] Remove UnsafeFPMath check in
 `visitFADDForFMACombine`

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp        |  4 ++--
 llvm/test/CodeGen/AMDGPU/fdot2.ll                    |  8 ++++----
 llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll        | 12 ++++++------
 llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll        |  8 ++++----
 llvm/test/CodeGen/NVPTX/fma-relu-contract.ll         | 10 +++++-----
 llvm/test/CodeGen/PowerPC/fmf-propagation.ll         |  4 ++--
 .../CodeGen/PowerPC/vsx-fma-mutate-trivial-copy.ll   |  2 +-
 llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll           |  2 +-
 llvm/test/CodeGen/X86/sqrt-fastmath.ll               |  6 +++---
 9 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index f4caaf426de6a..00f83f4a100d4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -16320,8 +16320,8 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
   if (!HasFMAD && !HasFMA)
     return SDValue();
 
-  bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
-                              Options.UnsafeFPMath || HasFMAD);
+  bool AllowFusionGlobally =
+      Options.AllowFPOpFusion == FPOpFusion::Fast || HasFMAD;
   // If the addition is not contractable, do not combine.
   if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
     return SDValue();
diff --git a/llvm/test/CodeGen/AMDGPU/fdot2.ll b/llvm/test/CodeGen/AMDGPU/fdot2.ll
index 776816d6aa0e3..15e161d718e80 100644
--- a/llvm/test/CodeGen/AMDGPU/fdot2.ll
+++ b/llvm/test/CodeGen/AMDGPU/fdot2.ll
@@ -1,7 +1,7 @@
-; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=preserve-sign -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GFX900
-; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -denormal-fp-math-f32=preserve-sign -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GCN-DL-UNSAFE,GFX906-DL-UNSAFE
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1011 -denormal-fp-math-f32=preserve-sign -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GCN-DL-UNSAFE,GFX10-DL-UNSAFE,GFX10-CONTRACT
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1012 -denormal-fp-math-f32=preserve-sign -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GCN-DL-UNSAFE,GFX10-DL-UNSAFE,GFX10-CONTRACT
+; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=preserve-sign -fp-contract=fast -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GFX900
+; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -denormal-fp-math-f32=preserve-sign -fp-contract=fast -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GCN-DL-UNSAFE,GFX906-DL-UNSAFE
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1011 -denormal-fp-math-f32=preserve-sign -fp-contract=fast -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GCN-DL-UNSAFE,GFX10-DL-UNSAFE,GFX10-CONTRACT
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1012 -denormal-fp-math-f32=preserve-sign -fp-contract=fast -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GCN-DL-UNSAFE,GFX10-DL-UNSAFE,GFX10-CONTRACT
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -denormal-fp-math-f32=preserve-sign -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GFX906
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -denormal-fp-math=preserve-sign -fp-contract=fast -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GFX906-CONTRACT
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -denormal-fp-math=ieee -fp-contract=fast -verify-machineinstrs < %s | FileCheck %s  -check-prefixes=GCN,GFX906-DENORM-CONTRACT
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll
index b32630a97b3ad..610cda4933adf 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll
@@ -1,12 +1,12 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn -mcpu=hawaii -mattr=+flat-for-global < %s | FileCheck -enable-var-scope --check-prefixes=SI,SI-SAFE %s
-; RUN: llc -enable-no-signed-zeros-fp-math -mtriple=amdgcn -mcpu=hawaii -mattr=+flat-for-global < %s | FileCheck -enable-var-scope --check-prefixes=SI,SI-NSZ %s
+; RUN: llc -mtriple=amdgcn -mcpu=hawaii -mattr=+flat-for-global -fp-contract=fast < %s | FileCheck -enable-var-scope --check-prefixes=SI,SI-SAFE %s
+; RUN: llc -enable-no-signed-zeros-fp-math -mtriple=amdgcn -mcpu=hawaii -mattr=+flat-for-global -fp-contract=fast < %s | FileCheck -enable-var-scope --check-prefixes=SI,SI-NSZ %s
 
-; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -enable-var-scope --check-prefixes=VI,VI-SAFE %s
-; RUN: llc -enable-no-signed-zeros-fp-math -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -enable-var-scope --check-prefixes=VI,VI-NSZ %s
+; RUN: llc -mtriple=amdgcn -mcpu=fiji -fp-contract=fast < %s | FileCheck -enable-var-scope --check-prefixes=VI,VI-SAFE %s
+; RUN: llc -enable-no-signed-zeros-fp-math -mtriple=amdgcn -mcpu=fiji -fp-contract=fast < %s | FileCheck -enable-var-scope --check-prefixes=VI,VI-NSZ %s
 
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -enable-var-scope --check-prefixes=GFX11,GFX11-SAFE %s
-; RUN: llc -enable-no-signed-zeros-fp-math -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -enable-var-scope --check-prefixes=GFX11,GFX11-NSZ %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -fp-contract=fast < %s | FileCheck -enable-var-scope --check-prefixes=GFX11,GFX11-SAFE %s
+; RUN: llc -enable-no-signed-zeros-fp-math -mtriple=amdgcn -mcpu=gfx1100 -fp-contract=fast < %s | FileCheck -enable-var-scope --check-prefixes=GFX11,GFX11-NSZ %s
 
 ; --------------------------------------------------------------------------------
 ; fadd tests
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
index 9a72fe96b5c3a..2ae2047d08a68 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
@@ -1,9 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn -mcpu=hawaii -mattr=+flat-for-global < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GCN-SAFE,SI,SI-SAFE %s
-; RUN: llc -enable-no-signed-zeros-fp-math -mtriple=amdgcn -mcpu=hawaii -mattr=+flat-for-global < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GCN-NSZ,SI,SI-NSZ %s
+; RUN: llc -mtriple=amdgcn -mcpu=hawaii -mattr=+flat-for-global -fp-contract=fast < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GCN-SAFE,SI,SI-SAFE %s
+; RUN: llc -enable-no-signed-zeros-fp-math -mtriple=amdgcn -mcpu=hawaii -mattr=+flat-for-global -fp-contract=fast < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GCN-NSZ,SI,SI-NSZ %s
 
-; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GCN-SAFE,VI,VI-SAFE %s
-; RUN: llc -enable-no-signed-zeros-fp-math -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GCN-NSZ,VI,VI-NSZ %s
+; RUN: llc -mtriple=amdgcn -mcpu=fiji -fp-contract=fast < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GCN-SAFE,VI,VI-SAFE %s
+; RUN: llc -enable-no-signed-zeros-fp-math -mtriple=amdgcn -mcpu=fiji -fp-contract=fast < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GCN-NSZ,VI,VI-NSZ %s
 
 ; --------------------------------------------------------------------------------
 ; fadd tests
diff --git a/llvm/test/CodeGen/NVPTX/fma-relu-contract.ll b/llvm/test/CodeGen/NVPTX/fma-relu-contract.ll
index 7dce894620e6b..eb8538c791a37 100644
--- a/llvm/test/CodeGen/NVPTX/fma-relu-contract.ll
+++ b/llvm/test/CodeGen/NVPTX/fma-relu-contract.ll
@@ -1,13 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_80 | FileCheck %s
-; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_80 | %ptxas-verify -arch=sm_80 %}
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_80 -fp-contract=fast | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_80 -fp-contract=fast | %ptxas-verify -arch=sm_80 %}
 
 ; Using FTZ should emit fma.ftz.relu for f16, not for bf16
-; RUN: llc < %s -denormal-fp-math-f32=preserve-sign -mtriple=nvptx64 -mcpu=sm_80 | FileCheck %s --check-prefixes=CHECK-FTZ
-; RUN: %if ptxas %{ llc < %s -denormal-fp-math-f32=preserve-sign -mtriple=nvptx64 -mcpu=sm_80 | %ptxas-verify -arch=sm_80 %}
+; RUN: llc < %s -denormal-fp-math-f32=preserve-sign -mtriple=nvptx64 -mcpu=sm_80 -fp-contract=fast | FileCheck %s --check-prefixes=CHECK-FTZ
+; RUN: %if ptxas %{ llc < %s -denormal-fp-math-f32=preserve-sign -fp-contract=fast -mtriple=nvptx64 -mcpu=sm_80 | %ptxas-verify -arch=sm_80 %}
 
 ; SM < 80 or (which needs PTX version >= 70) should not emit fma{.ftz}.relu
-; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_70 | FileCheck %s --check-prefixes=CHECK-SM70
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_70 -fp-contract=fast | FileCheck %s --check-prefixes=CHECK-SM70
 
 define half @fma_f16_expanded_no_nans(half %a, half %b, half %c) #0 {
 ; CHECK-LABEL: fma_f16_expanded_no_nans(
diff --git a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll
index 4e72a5ac5ede3..e71f59c79ce4d 100644
--- a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll
+++ b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll
@@ -2,8 +2,8 @@
 ; REQUIRES: asserts
 ; RUN: llc < %s -mtriple=powerpc64le -debug-only=isel -o /dev/null 2>&1                        | FileCheck %s --check-prefix=FMFDEBUG
 ; RUN: llc < %s -mtriple=powerpc64le                                                           | FileCheck %s --check-prefix=FMF
-; RUN: llc < %s -mtriple=powerpc64le -debug-only=isel -o /dev/null 2>&1 -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck %s --check-prefix=GLOBALDEBUG
-; RUN: llc < %s -mtriple=powerpc64le -enable-unsafe-fp-math -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math | FileCheck %s --check-prefix=GLOBAL
+; RUN: llc < %s -mtriple=powerpc64le -debug-only=isel -o /dev/null 2>&1 -enable-unsafe-fp-math -fp-contract=fast -enable-no-nans-fp-math | FileCheck %s --check-prefix=GLOBALDEBUG
+; RUN: llc < %s -mtriple=powerpc64le -enable-unsafe-fp-math -fp-contract=fast -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math | FileCheck %s --check-prefix=GLOBAL
 
 ; Test FP transforms using instruction/node-level fast-math-flags.
 ; We're also checking debug output to verify that FMF is propagated to the newly created nodes.
diff --git a/llvm/test/CodeGen/PowerPC/vsx-fma-mutate-trivial-copy.ll b/llvm/test/CodeGen/PowerPC/vsx-fma-mutate-trivial-copy.ll
index 96aa58000f9b7..8999be58a3581 100644
--- a/llvm/test/CodeGen/PowerPC/vsx-fma-mutate-trivial-copy.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx-fma-mutate-trivial-copy.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -fp-contract=fast < %s | FileCheck %s
 target datalayout = "E-m:e-i64:64-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
diff --git a/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll b/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll
index 2c7da100344b7..5b7f70180caa6 100644
--- a/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll
+++ b/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2,fma -stop-after=finalize-isel 2>&1 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2,fma -stop-after=finalize-isel -fp-contract=fast 2>&1 | FileCheck %s
 
 declare float @llvm.sqrt.f32(float) #2
 
diff --git a/llvm/test/CodeGen/X86/sqrt-fastmath.ll b/llvm/test/CodeGen/X86/sqrt-fastmath.ll
index 384f8b832afb9..fed87d55b7e3f 100644
--- a/llvm/test/CodeGen/X86/sqrt-fastmath.ll
+++ b/llvm/test/CodeGen/X86/sqrt-fastmath.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64--linux-gnu -mcpu=x86-64 -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
-; RUN: llc < %s -mtriple=x86_64--linux-gnu -mcpu=x86-64 -mattr=+avx  | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX1
-; RUN: llc < %s -mtriple=x86_64--linux-gnu -mcpu=x86-64 -mattr=+avx512f  | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX512
+; RUN: llc < %s -mtriple=x86_64--linux-gnu -mcpu=x86-64 -mattr=+sse2 -fp-contract=fast | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
+; RUN: llc < %s -mtriple=x86_64--linux-gnu -mcpu=x86-64 -mattr=+avx -fp-contract=fast  | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64--linux-gnu -mcpu=x86-64 -mattr=+avx512f -fp-contract=fast | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX512
 
 declare double @__sqrt_finite(double)
 declare float @__sqrtf_finite(float)

>From 311d52e5864c6e73ed94a4bfa781f71fc679c659 Mon Sep 17 00:00:00 2001
From: PaperChalice <liujunchang97 at outlook.com>
Date: Wed, 19 Feb 2025 18:10:10 +0800
Subject: [PATCH 2/5] Drop "unsafe-fp-math" in some CodeGen tests. Regression
 failures are expected...

---
 llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll        |  8 ++++----
 llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll        |  2 +-
 llvm/test/CodeGen/NVPTX/fma-relu-contract.ll         |  4 ++--
 .../CodeGen/PowerPC/vsx-fma-mutate-trivial-copy.ll   |  2 +-
 llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll           |  4 ++--
 llvm/test/CodeGen/X86/sqrt-fastmath.ll               | 12 ++++++------
 6 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll
index 610cda4933adf..b466780e94237 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll
@@ -1,11 +1,11 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn -mcpu=hawaii -mattr=+flat-for-global -fp-contract=fast < %s | FileCheck -enable-var-scope --check-prefixes=SI,SI-SAFE %s
+; RUN: llc -mtriple=amdgcn -mcpu=hawaii -mattr=+flat-for-global < %s | FileCheck -enable-var-scope --check-prefixes=SI,SI-SAFE %s
 ; RUN: llc -enable-no-signed-zeros-fp-math -mtriple=amdgcn -mcpu=hawaii -mattr=+flat-for-global -fp-contract=fast < %s | FileCheck -enable-var-scope --check-prefixes=SI,SI-NSZ %s
 
-; RUN: llc -mtriple=amdgcn -mcpu=fiji -fp-contract=fast < %s | FileCheck -enable-var-scope --check-prefixes=VI,VI-SAFE %s
+; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -enable-var-scope --check-prefixes=VI,VI-SAFE %s
 ; RUN: llc -enable-no-signed-zeros-fp-math -mtriple=amdgcn -mcpu=fiji -fp-contract=fast < %s | FileCheck -enable-var-scope --check-prefixes=VI,VI-NSZ %s
 
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -fp-contract=fast < %s | FileCheck -enable-var-scope --check-prefixes=GFX11,GFX11-SAFE %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -enable-var-scope --check-prefixes=GFX11,GFX11-SAFE %s
 ; RUN: llc -enable-no-signed-zeros-fp-math -mtriple=amdgcn -mcpu=gfx1100 -fp-contract=fast < %s | FileCheck -enable-var-scope --check-prefixes=GFX11,GFX11-NSZ %s
 
 ; --------------------------------------------------------------------------------
@@ -4758,6 +4758,6 @@ declare <4 x half> @llvm.fmuladd.v4f16(<4 x half>, <4 x half>, <4 x half>) #1
 
 attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
 attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind "unsafe-fp-math"="true" }
+attributes #2 = { nounwind }
 attributes #3 = { nounwind "no-signed-zeros-fp-math"="true" }
 attributes #4 = { nounwind "amdgpu-ieee"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
index 2ae2047d08a68..56b79e05823d6 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
@@ -3440,6 +3440,6 @@ declare half @llvm.amdgcn.rcp.f16(half) #1
 
 attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
 attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind "unsafe-fp-math"="true" }
+attributes #2 = { nounwind }
 attributes #3 = { nounwind "no-signed-zeros-fp-math"="true" }
 attributes #4 = { nounwind "amdgpu-ieee"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
diff --git a/llvm/test/CodeGen/NVPTX/fma-relu-contract.ll b/llvm/test/CodeGen/NVPTX/fma-relu-contract.ll
index eb8538c791a37..e433c58e86024 100644
--- a/llvm/test/CodeGen/NVPTX/fma-relu-contract.ll
+++ b/llvm/test/CodeGen/NVPTX/fma-relu-contract.ll
@@ -1221,5 +1221,5 @@ define <2 x bfloat> @fma_bf16x2_expanded_maxnum_no_nans(<2 x bfloat> %a, <2 x bf
   ret <2 x bfloat> %3
 }
 
-attributes #0 = { "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "unsafe-fp-math"="true" }
-attributes #1 = { "unsafe-fp-math"="true" }
+attributes #0 = { "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" }
+attributes #1 = { }
diff --git a/llvm/test/CodeGen/PowerPC/vsx-fma-mutate-trivial-copy.ll b/llvm/test/CodeGen/PowerPC/vsx-fma-mutate-trivial-copy.ll
index 8999be58a3581..539b563691723 100644
--- a/llvm/test/CodeGen/PowerPC/vsx-fma-mutate-trivial-copy.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx-fma-mutate-trivial-copy.ll
@@ -31,7 +31,7 @@ declare double @llvm.sqrt.f64(double) #1
 
 declare signext i32 @p_col_helper(...) #2
 
-attributes #0 = { nounwind "no-infs-fp-math"="true" "no-nans-fp-math"="true" "target-cpu"="pwr7" "unsafe-fp-math"="true" }
+attributes #0 = { nounwind "no-infs-fp-math"="true" "no-nans-fp-math"="true" "target-cpu"="pwr7" }
 attributes #1 = { nounwind readnone }
 attributes #2 = { nounwind }
 
diff --git a/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll b/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll
index 5b7f70180caa6..df0b2e1185e07 100644
--- a/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll
+++ b/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll
@@ -144,6 +144,6 @@ define float @rsqrt_daz(float %f) #1 {
   ret float %div
 }
 
-attributes #0 = { "unsafe-fp-math"="true" "reciprocal-estimates"="sqrt:2" "denormal-fp-math"="ieee,ieee" }
-attributes #1 = { "unsafe-fp-math"="true" "reciprocal-estimates"="sqrt:2" "denormal-fp-math"="ieee,preserve-sign" }
+attributes #0 = { "reciprocal-estimates"="sqrt:2" "denormal-fp-math"="ieee,ieee" }
+attributes #1 = { "reciprocal-estimates"="sqrt:2" "denormal-fp-math"="ieee,preserve-sign" }
 attributes #2 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/X86/sqrt-fastmath.ll b/llvm/test/CodeGen/X86/sqrt-fastmath.ll
index fed87d55b7e3f..00ee932e73eb8 100644
--- a/llvm/test/CodeGen/X86/sqrt-fastmath.ll
+++ b/llvm/test/CodeGen/X86/sqrt-fastmath.ll
@@ -1005,10 +1005,10 @@ define double @sqrt_simplify_before_recip_order(double %x, ptr %p) nounwind {
   ret double %sqrt_fast
 }
 
-attributes #0 = { "unsafe-fp-math"="true" "reciprocal-estimates"="!sqrtf,!vec-sqrtf,!divf,!vec-divf" }
-attributes #1 = { "unsafe-fp-math"="true" "reciprocal-estimates"="sqrt,vec-sqrt" }
+attributes #0 = { "reciprocal-estimates"="!sqrtf,!vec-sqrtf,!divf,!vec-divf" }
+attributes #1 = { "reciprocal-estimates"="sqrt,vec-sqrt" }
 attributes #2 = { nounwind readnone }
-attributes #3 = { "unsafe-fp-math"="true" "reciprocal-estimates"="sqrt,vec-sqrt" "denormal-fp-math"="preserve-sign,ieee" }
-attributes #4 = { "unsafe-fp-math"="true" "reciprocal-estimates"="sqrt,vec-sqrt" "denormal-fp-math"="ieee,preserve-sign" }
-attributes #5 = { "unsafe-fp-math"="true" "reciprocal-estimates"="all:0" }
-attributes #6 = { "unsafe-fp-math"="true" "reciprocal-estimates"="sqrt,vec-sqrt" "denormal-fp-math"="preserve-sign,dynamic" }
+attributes #3 = { "reciprocal-estimates"="sqrt,vec-sqrt" "denormal-fp-math"="preserve-sign,ieee" }
+attributes #4 = { "reciprocal-estimates"="sqrt,vec-sqrt" "denormal-fp-math"="ieee,preserve-sign" }
+attributes #5 = { "reciprocal-estimates"="all:0" }
+attributes #6 = { "reciprocal-estimates"="sqrt,vec-sqrt" "denormal-fp-math"="preserve-sign,dynamic" }

>From d4a11341fdc8250b1cef2993355cfc857cc3b96c Mon Sep 17 00:00:00 2001
From: PaperChalice <liujunchang97 at outlook.com>
Date: Wed, 19 Feb 2025 20:54:07 +0800
Subject: [PATCH 3/5] drop empty attribute group

---
 llvm/test/CodeGen/NVPTX/fma-relu-contract.ll | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/llvm/test/CodeGen/NVPTX/fma-relu-contract.ll b/llvm/test/CodeGen/NVPTX/fma-relu-contract.ll
index e433c58e86024..84508364b14d7 100644
--- a/llvm/test/CodeGen/NVPTX/fma-relu-contract.ll
+++ b/llvm/test/CodeGen/NVPTX/fma-relu-contract.ll
@@ -119,7 +119,7 @@ define half @fma_f16_expanded_no_nans_multiple_uses_of_fma(half %a, half %b, hal
   ret half %6
 }
 
-define half @fma_f16_expanded_unsafe_with_nans(half %a, half %b, half %c) #1 {
+define half @fma_f16_expanded_unsafe_with_nans(half %a, half %b, half %c) {
 ; CHECK-LABEL: fma_f16_expanded_unsafe_with_nans(
 ; CHECK:       {
 ; CHECK-NEXT:    .reg .b16 %rs<7>;
@@ -216,7 +216,7 @@ define half @fma_f16_expanded_maxnum_no_nans(half %a, half %b, half %c) #0 {
   ret half %3
 }
 
-define bfloat @fma_bf16_expanded_unsafe_with_nans(bfloat %a, bfloat %b, bfloat %c) #1 {
+define bfloat @fma_bf16_expanded_unsafe_with_nans(bfloat %a, bfloat %b, bfloat %c) {
 ; CHECK-LABEL: fma_bf16_expanded_unsafe_with_nans(
 ; CHECK:       {
 ; CHECK-NEXT:    .reg .b16 %rs<7>;
@@ -647,7 +647,7 @@ define <2 x half> @fma_f16x2_expanded_no_nans_multiple_uses_of_fma(<2 x half> %a
   ret <2 x half> %6
 }
 
-define <2 x half> @fma_f16x2_expanded_unsafe_with_nans(<2 x half> %a, <2 x half> %b, <2 x half> %c) #1 {
+define <2 x half> @fma_f16x2_expanded_unsafe_with_nans(<2 x half> %a, <2 x half> %b, <2 x half> %c) {
 ; CHECK-LABEL: fma_f16x2_expanded_unsafe_with_nans(
 ; CHECK:       {
 ; CHECK-NEXT:    .reg .b32 %r<7>;
@@ -754,7 +754,7 @@ define <2 x half> @fma_f16x2_expanded_maxnum_no_nans(<2 x half> %a, <2 x half> %
   ret <2 x half> %3
 }
 
-define <2 x bfloat> @fma_bf16x2_expanded_unsafe_with_nans(<2 x bfloat> %a, <2 x bfloat> %b, <2 x bfloat> %c) #1 {
+define <2 x bfloat> @fma_bf16x2_expanded_unsafe_with_nans(<2 x bfloat> %a, <2 x bfloat> %b, <2 x bfloat> %c) {
 ; CHECK-LABEL: fma_bf16x2_expanded_unsafe_with_nans(
 ; CHECK:       {
 ; CHECK-NEXT:    .reg .b32 %r<7>;
@@ -1222,4 +1222,3 @@ define <2 x bfloat> @fma_bf16x2_expanded_maxnum_no_nans(<2 x bfloat> %a, <2 x bf
 }
 
 attributes #0 = { "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" }
-attributes #1 = { }

>From db148eb14088efb0b795e72c7557e50fea868358 Mon Sep 17 00:00:00 2001
From: PaperChalice <liujunchang97 at outlook.com>
Date: Thu, 20 Feb 2025 20:15:28 +0800
Subject: [PATCH 4/5] fold constant in visitFMUL

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 00f83f4a100d4..c15bc7709e51d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -17387,6 +17387,17 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
   if (SDValue C = DAG.FoldConstantArithmetic(ISD::FMUL, DL, VT, {N0, N1}))
     return C;
 
+  // fold (fmul N0 * 0.0) --> 0.0
+  if (DAG.getMachineFunction()
+          .getFunction()
+          .getFnAttribute("no-signed-zeros-fp-math")
+          .getValueAsBool() ||
+      Flags.hasNoSignedZeros()) {
+    ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
+    if (N1C && N1C->isZero() && Flags.hasNoNaNs() && Flags.hasNoInfs())
+      return N1;
+  }
+
   // canonicalize constant to RHS
   if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
      !DAG.isConstantFPBuildVectorOrConstantFP(N1))

>From e1481f7eed872557145296a013ed58f175cb3e93 Mon Sep 17 00:00:00 2001
From: PaperChalice <liujunchang97 at outlook.com>
Date: Fri, 21 Feb 2025 10:04:55 +0800
Subject: [PATCH 5/5] Check fast math related flags

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  3 ++-
 llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll | 23 ++++++++++++-------
 2 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index c15bc7709e51d..3de75f3c1dd38 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -17564,7 +17564,8 @@ template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) {
   }
 
   // FIXME: use fast math flags instead of Options.UnsafeFPMath
-  if (Options.UnsafeFPMath) {
+  if (Options.AllowFPOpFusion == FPOpFusion::Fast ||
+      Options.NoSignedZerosFPMath) {
     if (N0CFP && N0CFP->isZero())
       return N2;
     if (N1CFP && N1CFP->isZero())
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll
index b466780e94237..38bac6d5b8699 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn -mcpu=hawaii -mattr=+flat-for-global < %s | FileCheck -enable-var-scope --check-prefixes=SI,SI-SAFE %s
+; RUN: llc -mtriple=amdgcn -mcpu=hawaii -mattr=+flat-for-global -fp-contract=fast < %s | FileCheck -enable-var-scope --check-prefixes=SI,SI-SAFE %s
 ; RUN: llc -enable-no-signed-zeros-fp-math -mtriple=amdgcn -mcpu=hawaii -mattr=+flat-for-global -fp-contract=fast < %s | FileCheck -enable-var-scope --check-prefixes=SI,SI-NSZ %s
 
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -enable-var-scope --check-prefixes=VI,VI-SAFE %s
@@ -623,10 +623,13 @@ define amdgpu_ps half @fneg_fadd_0_nsz_f16(half inreg %tmp2, half inreg %tmp6, <
 ;
 ; VI-SAFE-LABEL: fneg_fadd_0_nsz_f16:
 ; VI-SAFE:       ; %bb.0: ; %.entry
-; VI-SAFE-NEXT:    v_mov_b32_e32 v0, 0x8000
+; VI-SAFE-NEXT:    v_rcp_f16_e32 v0, s1
 ; VI-SAFE-NEXT:    v_mov_b32_e32 v1, s0
-; VI-SAFE-NEXT:    v_cmp_ngt_f16_e64 vcc, s0, 0
-; VI-SAFE-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; VI-SAFE-NEXT:    v_mul_f16_e32 v0, 0, v0
+; VI-SAFE-NEXT:    v_add_f16_e32 v0, 0, v0
+; VI-SAFE-NEXT:    v_xor_b32_e32 v2, 0x8000, v0
+; VI-SAFE-NEXT:    v_cmp_ngt_f16_e32 vcc, s0, v0
+; VI-SAFE-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
 ; VI-SAFE-NEXT:    v_mov_b32_e32 v1, 0x7e00
 ; VI-SAFE-NEXT:    v_cmp_nlt_f16_e32 vcc, 0, v0
 ; VI-SAFE-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
@@ -646,12 +649,16 @@ define amdgpu_ps half @fneg_fadd_0_nsz_f16(half inreg %tmp2, half inreg %tmp6, <
 ;
 ; GFX11-SAFE-LABEL: fneg_fadd_0_nsz_f16:
 ; GFX11-SAFE:       ; %bb.0: ; %.entry
-; GFX11-SAFE-NEXT:    v_mov_b32_e32 v0, s0
-; GFX11-SAFE-NEXT:    v_cmp_ngt_f16_e64 vcc_lo, s0, 0
+; GFX11-SAFE-NEXT:    v_rcp_f16_e32 v0, s1
+; GFX11-SAFE-NEXT:    s_waitcnt_depctr 0xfff
+; GFX11-SAFE-NEXT:    v_mul_f16_e32 v0, 0, v0
+; GFX11-SAFE-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-SAFE-NEXT:    v_add_f16_e32 v0, 0, v0
+; GFX11-SAFE-NEXT:    v_xor_b32_e32 v1, 0x8000, v0
+; GFX11-SAFE-NEXT:    v_cmp_ngt_f16_e32 vcc_lo, s0, v0
 ; GFX11-SAFE-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-NEXT:    v_cndmask_b32_e32 v0, 0x8000, v0, vcc_lo
+; GFX11-SAFE-NEXT:    v_cndmask_b32_e64 v0, v1, s0, vcc_lo
 ; GFX11-SAFE-NEXT:    v_cmp_nlt_f16_e32 vcc_lo, 0, v0
-; GFX11-SAFE-NEXT:    s_delay_alu instid0(VALU_DEP_3)
 ; GFX11-SAFE-NEXT:    v_cndmask_b32_e64 v0, 0x7e00, 0, vcc_lo
 ; GFX11-SAFE-NEXT:    ; return to shader part epilog
 ;



More information about the llvm-commits mailing list