[llvm] [InstCombine] Enable FAdd simplifications when user can ignore sign bit (PR #157757)

Thu Sep 11 14:52:46 PDT 2025

https://github.com/VedantParanjape updated https://github.com/llvm/llvm-project/pull/157757

>From b230ed786191424bde3eb6411d193e2adf9fe086 Mon Sep 17 00:00:00 2001
From: Vedant Paranjape <vedantparanjape160201 at gmail.com>
Date: Tue, 9 Sep 2025 17:24:52 -0400
Subject: [PATCH 1/4] [InstSimplify] Enable FAdd simplifications when user can
 ignore sign bit

When FAdd result is used by fabs, we can safely ignore the sign bit of
fp zero. This patch enables an instruction simplification optimization
that folds fadd x, 0 ==> x, which would otherwise not work as the
compiler cannot prove that the zero isn't -0. But if the result of the
fadd is used by fabs we can simply ignore this and still do the
optimization.

Fixes #154238
---
 llvm/lib/Analysis/InstructionSimplify.cpp            |  4 +++-
 .../test/CodeGen/AMDGPU/fcanonicalize-elimination.ll |  5 ++---
 .../InstSimplify/fold-fadd-with-zero-gh154238.ll     | 12 ++++++++++++
 3 files changed, 17 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/Transforms/InstSimplify/fold-fadd-with-zero-gh154238.ll

diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 5907e21065331..c49265d77dd4d 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -5710,7 +5710,9 @@ simplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
   // fadd X, 0 ==> X, when we know X is not -0
   if (canIgnoreSNaN(ExBehavior, FMF))
     if (match(Op1, m_PosZeroFP()) &&
-        (FMF.noSignedZeros() || cannotBeNegativeZero(Op0, Q)))
+        (FMF.noSignedZeros() || cannotBeNegativeZero(Op0, Q) ||
+         (Q.CxtI && !Q.CxtI->use_empty() &&
+          canIgnoreSignBitOfZero(*(Q.CxtI->use_begin())))))
       return Op0;
 
   if (!isDefaultFPEnvironment(ExBehavior, Rounding))
diff --git a/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll b/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
index ab51693198a30..1973ede5a62a8 100644
--- a/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
+++ b/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
@@ -363,9 +363,8 @@ define amdgpu_kernel void @test_no_fold_canonicalize_fcopysign_value_f32(ptr add
 }
 
 ; GCN-LABEL: test_fold_canonicalize_fabs_value_f32:
-; GCN: v_and_b32_e32 [[V:v[0-9]+]], 0x7fffffff, v{{[0-9]+}}
-; GCN-NOT: v_mul
-; GCN-NOT: v_max
+; VI: v_mul_f32_e64 [[V:v[0-9]+]], 1.0, |[[V]]|
+; GFX9: v_max_f32_e64 [[V:v[0-9]+]], |[[V]]|, |[[V]]|
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
 define amdgpu_kernel void @test_fold_canonicalize_fabs_value_f32(ptr addrspace(1) %arg) {
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/Transforms/InstSimplify/fold-fadd-with-zero-gh154238.ll b/llvm/test/Transforms/InstSimplify/fold-fadd-with-zero-gh154238.ll
new file mode 100644
index 0000000000000..bb12328574dda
--- /dev/null
+++ b/llvm/test/Transforms/InstSimplify/fold-fadd-with-zero-gh154238.ll
@@ -0,0 +1,12 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=instsimplify -S | FileCheck %s
+define float @src(float %arg1) {
+; CHECK-LABEL: define float @src(
+; CHECK-SAME: float [[ARG1:%.*]]) {
+; CHECK-NEXT:    [[V3:%.*]] = call float @llvm.fabs.f32(float [[ARG1]])
+; CHECK-NEXT:    ret float [[V3]]
+;
+  %v2 = fadd float %arg1, 0.000000e+00
+  %v3 = call float @llvm.fabs.f32(float %v2)
+  ret float %v3
+}

>From 3aaf1557e3f70ba099a1a3d378b2f82d2b6ec89b Mon Sep 17 00:00:00 2001
From: Vedant Paranjape <vedantparanjape160201 at gmail.com>
Date: Wed, 10 Sep 2025 14:19:51 -0400
Subject: [PATCH 2/4] moved opt to IC from InstSimplify

---
 llvm/lib/Analysis/InstructionSimplify.cpp              |  4 +---
 llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp  | 10 ++++++++++
 .../fold-fadd-with-zero-gh154238.ll                    |  2 +-
 3 files changed, 12 insertions(+), 4 deletions(-)
 rename llvm/test/Transforms/{InstSimplify => InstCombine}/fold-fadd-with-zero-gh154238.ll (88%)

diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index c49265d77dd4d..5907e21065331 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -5710,9 +5710,7 @@ simplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
   // fadd X, 0 ==> X, when we know X is not -0
   if (canIgnoreSNaN(ExBehavior, FMF))
     if (match(Op1, m_PosZeroFP()) &&
-        (FMF.noSignedZeros() || cannotBeNegativeZero(Op0, Q) ||
-         (Q.CxtI && !Q.CxtI->use_empty() &&
-          canIgnoreSignBitOfZero(*(Q.CxtI->use_begin())))))
+        (FMF.noSignedZeros() || cannotBeNegativeZero(Op0, Q)))
       return Op0;
 
   if (!isDefaultFPEnvironment(ExBehavior, Rounding))
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index d934638c15e75..3705dde993f2b 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -2002,6 +2002,16 @@ Instruction *InstCombinerImpl::visitFAdd(BinaryOperator &I) {
   if (Instruction *FoldedFAdd = foldBinOpIntoSelectOrPhi(I))
     return FoldedFAdd;
 
+  // B = fadd A, 0
+  // Z = Op B
+  // can be transformed into
+  // Z = Op A
+  // Where Op is such that we can ignore sign of 0 in fadd
+  Value *A;
+  if (match(&I, m_c_FAdd(m_Value(A), m_AnyZeroFP())) && !I.use_empty() &&
+      canIgnoreSignBitOfZero(*I.use_begin()))
+    return replaceInstUsesWith(I, A);
+
   // (-X) + Y --> Y - X
   Value *X, *Y;
   if (match(&I, m_c_FAdd(m_FNeg(m_Value(X)), m_Value(Y))))
diff --git a/llvm/test/Transforms/InstSimplify/fold-fadd-with-zero-gh154238.ll b/llvm/test/Transforms/InstCombine/fold-fadd-with-zero-gh154238.ll
similarity index 88%
rename from llvm/test/Transforms/InstSimplify/fold-fadd-with-zero-gh154238.ll
rename to llvm/test/Transforms/InstCombine/fold-fadd-with-zero-gh154238.ll
index bb12328574dda..dc2e3385a42b6 100644
--- a/llvm/test/Transforms/InstSimplify/fold-fadd-with-zero-gh154238.ll
+++ b/llvm/test/Transforms/InstCombine/fold-fadd-with-zero-gh154238.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt < %s -passes=instsimplify -S | FileCheck %s
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
 define float @src(float %arg1) {
 ; CHECK-LABEL: define float @src(
 ; CHECK-SAME: float [[ARG1:%.*]]) {

>From 717c2c88361fb4da77679b0dc938166226cbb05b Mon Sep 17 00:00:00 2001
From: Vedant Paranjape <vedantparanjape160201 at gmail.com>
Date: Wed, 10 Sep 2025 15:17:02 -0400
Subject: [PATCH 3/4] revert fcanonicalize testcase

---
 llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll b/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
index 1973ede5a62a8..ab51693198a30 100644
--- a/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
+++ b/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
@@ -363,8 +363,9 @@ define amdgpu_kernel void @test_no_fold_canonicalize_fcopysign_value_f32(ptr add
 }
 
 ; GCN-LABEL: test_fold_canonicalize_fabs_value_f32:
-; VI: v_mul_f32_e64 [[V:v[0-9]+]], 1.0, |[[V]]|
-; GFX9: v_max_f32_e64 [[V:v[0-9]+]], |[[V]]|, |[[V]]|
+; GCN: v_and_b32_e32 [[V:v[0-9]+]], 0x7fffffff, v{{[0-9]+}}
+; GCN-NOT: v_mul
+; GCN-NOT: v_max
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
 define amdgpu_kernel void @test_fold_canonicalize_fabs_value_f32(ptr addrspace(1) %arg) {
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()

>From 486bbe7322a8b23f13cc8464d59e485b038ab593 Mon Sep 17 00:00:00 2001
From: Vedant Paranjape <vedantparanjape160201 at gmail.com>
Date: Thu, 11 Sep 2025 17:43:02 -0400
Subject: [PATCH 4/4] rework to check for a single use, and update the testcase

---
 .../Transforms/InstCombine/InstCombineAddSub.cpp   |  2 +-
 .../InstCombine/fold-fadd-with-zero-gh154238.ll    | 14 ++++++++++++++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 3705dde993f2b..6dfc6e11621d1 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -2008,7 +2008,7 @@ Instruction *InstCombinerImpl::visitFAdd(BinaryOperator &I) {
   // Z = Op A
   // Where Op is such that we can ignore sign of 0 in fadd
   Value *A;
-  if (match(&I, m_c_FAdd(m_Value(A), m_AnyZeroFP())) && !I.use_empty() &&
+  if (match(&I, m_OneUse(m_c_FAdd(m_Value(A), m_AnyZeroFP()))) &&
       canIgnoreSignBitOfZero(*I.use_begin()))
     return replaceInstUsesWith(I, A);
 
diff --git a/llvm/test/Transforms/InstCombine/fold-fadd-with-zero-gh154238.ll b/llvm/test/Transforms/InstCombine/fold-fadd-with-zero-gh154238.ll
index dc2e3385a42b6..f9f0ca8a08bcb 100644
--- a/llvm/test/Transforms/InstCombine/fold-fadd-with-zero-gh154238.ll
+++ b/llvm/test/Transforms/InstCombine/fold-fadd-with-zero-gh154238.ll
@@ -10,3 +10,17 @@ define float @src(float %arg1) {
   %v3 = call float @llvm.fabs.f32(float %v2)
   ret float %v3
 }
+
+define float @src2(float %arg1) {
+; CHECK-LABEL: define float @src2(
+; CHECK-SAME: float [[ARG1:%.*]]) {
+; CHECK-NEXT:    [[V2:%.*]] = fadd float [[ARG1]], 0.000000e+00
+; CHECK-NEXT:    [[V3:%.*]] = call float @llvm.fabs.f32(float [[V2]])
+; CHECK-NEXT:    [[V4:%.*]] = fsub float [[V2]], [[V3]]
+; CHECK-NEXT:    ret float [[V4]]
+;
+  %v2 = fadd float %arg1, 0.000000e+00
+  %v3 = call float @llvm.fabs.f32(float %v2)
+  %v4 = fsub float %v2, %v3
+  ret float %v4
+}