[llvm] [InstSimplify] Enable FAdd simplifications when user can ignore sign bit (PR #157757)

Tue Sep 9 21:51:31 PDT 2025

https://github.com/VedantParanjape updated https://github.com/llvm/llvm-project/pull/157757

>From b230ed786191424bde3eb6411d193e2adf9fe086 Mon Sep 17 00:00:00 2001
From: Vedant Paranjape <vedantparanjape160201 at gmail.com>
Date: Tue, 9 Sep 2025 17:24:52 -0400
Subject: [PATCH] [InstSimplify] Enable FAdd simplifications when user can
 ignore sign bit

When FAdd result is used by fabs, we can safely ignore the sign bit of
fp zero. This patch enables an instruction simplification optimization
that folds fadd x, 0 ==> x, which would otherwise not work as the
compiler cannot prove that the zero isn't -0. But if the result of the
fadd is used by fabs we can simply ignore this and still do the
optimization.

Fixes #154238
---
 llvm/lib/Analysis/InstructionSimplify.cpp            |  4 +++-
 .../test/CodeGen/AMDGPU/fcanonicalize-elimination.ll |  5 ++---
 .../InstSimplify/fold-fadd-with-zero-gh154238.ll     | 12 ++++++++++++
 3 files changed, 17 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/Transforms/InstSimplify/fold-fadd-with-zero-gh154238.ll

diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 5907e21065331..c49265d77dd4d 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -5710,7 +5710,9 @@ simplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
   // fadd X, 0 ==> X, when we know X is not -0
   if (canIgnoreSNaN(ExBehavior, FMF))
     if (match(Op1, m_PosZeroFP()) &&
-        (FMF.noSignedZeros() || cannotBeNegativeZero(Op0, Q)))
+        (FMF.noSignedZeros() || cannotBeNegativeZero(Op0, Q) ||
+         (Q.CxtI && !Q.CxtI->use_empty() &&
+          canIgnoreSignBitOfZero(*(Q.CxtI->use_begin())))))
       return Op0;
 
   if (!isDefaultFPEnvironment(ExBehavior, Rounding))
diff --git a/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll b/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
index ab51693198a30..1973ede5a62a8 100644
--- a/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
+++ b/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
@@ -363,9 +363,8 @@ define amdgpu_kernel void @test_no_fold_canonicalize_fcopysign_value_f32(ptr add
 }
 
 ; GCN-LABEL: test_fold_canonicalize_fabs_value_f32:
-; GCN: v_and_b32_e32 [[V:v[0-9]+]], 0x7fffffff, v{{[0-9]+}}
-; GCN-NOT: v_mul
-; GCN-NOT: v_max
+; VI: v_mul_f32_e64 [[V:v[0-9]+]], 1.0, |[[V]]|
+; GFX9: v_max_f32_e64 [[V:v[0-9]+]], |[[V]]|, |[[V]]|
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
 define amdgpu_kernel void @test_fold_canonicalize_fabs_value_f32(ptr addrspace(1) %arg) {
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/Transforms/InstSimplify/fold-fadd-with-zero-gh154238.ll b/llvm/test/Transforms/InstSimplify/fold-fadd-with-zero-gh154238.ll
new file mode 100644
index 0000000000000..bb12328574dda
--- /dev/null
+++ b/llvm/test/Transforms/InstSimplify/fold-fadd-with-zero-gh154238.ll
@@ -0,0 +1,12 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=instsimplify -S | FileCheck %s
+define float @src(float %arg1) {
+; CHECK-LABEL: define float @src(
+; CHECK-SAME: float [[ARG1:%.*]]) {
+; CHECK-NEXT:    [[V3:%.*]] = call float @llvm.fabs.f32(float [[ARG1]])
+; CHECK-NEXT:    ret float [[V3]]
+;
+  %v2 = fadd float %arg1, 0.000000e+00
+  %v3 = call float @llvm.fabs.f32(float %v2)
+  ret float %v3
+}