[llvm] [InstCombine] Enable FAdd simplifications when user can ignore sign bit (PR #157757)

Thu Sep 11 19:56:04 PDT 2025

https://github.com/VedantParanjape updated https://github.com/llvm/llvm-project/pull/157757

>From b230ed786191424bde3eb6411d193e2adf9fe086 Mon Sep 17 00:00:00 2001
From: Vedant Paranjape <vedantparanjape160201 at gmail.com>
Date: Tue, 9 Sep 2025 17:24:52 -0400
Subject: [PATCH 1/6] [InstSimplify] Enable FAdd simplifications when user can
 ignore sign bit

When FAdd result is used by fabs, we can safely ignore the sign bit of
fp zero. This patch enables an instruction simplification optimization
that folds fadd x, 0 ==> x, which would otherwise not work as the
compiler cannot prove that the zero isn't -0. But if the result of the
fadd is used by fabs we can simply ignore this and still do the
optimization.

Fixes #154238
---
 llvm/lib/Analysis/InstructionSimplify.cpp            |  4 +++-
 .../test/CodeGen/AMDGPU/fcanonicalize-elimination.ll |  5 ++---
 .../InstSimplify/fold-fadd-with-zero-gh154238.ll     | 12 ++++++++++++
 3 files changed, 17 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/Transforms/InstSimplify/fold-fadd-with-zero-gh154238.ll

diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 5907e21065331..c49265d77dd4d 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -5710,7 +5710,9 @@ simplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
   // fadd X, 0 ==> X, when we know X is not -0
   if (canIgnoreSNaN(ExBehavior, FMF))
     if (match(Op1, m_PosZeroFP()) &&
-        (FMF.noSignedZeros() || cannotBeNegativeZero(Op0, Q)))
+        (FMF.noSignedZeros() || cannotBeNegativeZero(Op0, Q) ||
+         (Q.CxtI && !Q.CxtI->use_empty() &&
+          canIgnoreSignBitOfZero(*(Q.CxtI->use_begin())))))
       return Op0;
 
   if (!isDefaultFPEnvironment(ExBehavior, Rounding))
diff --git a/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll b/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
index ab51693198a30..1973ede5a62a8 100644
--- a/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
+++ b/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
@@ -363,9 +363,8 @@ define amdgpu_kernel void @test_no_fold_canonicalize_fcopysign_value_f32(ptr add
 }
 
 ; GCN-LABEL: test_fold_canonicalize_fabs_value_f32:
-; GCN: v_and_b32_e32 [[V:v[0-9]+]], 0x7fffffff, v{{[0-9]+}}
-; GCN-NOT: v_mul
-; GCN-NOT: v_max
+; VI: v_mul_f32_e64 [[V:v[0-9]+]], 1.0, |[[V]]|
+; GFX9: v_max_f32_e64 [[V:v[0-9]+]], |[[V]]|, |[[V]]|
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
 define amdgpu_kernel void @test_fold_canonicalize_fabs_value_f32(ptr addrspace(1) %arg) {
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/Transforms/InstSimplify/fold-fadd-with-zero-gh154238.ll b/llvm/test/Transforms/InstSimplify/fold-fadd-with-zero-gh154238.ll
new file mode 100644
index 0000000000000..bb12328574dda
--- /dev/null
+++ b/llvm/test/Transforms/InstSimplify/fold-fadd-with-zero-gh154238.ll
@@ -0,0 +1,12 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=instsimplify -S | FileCheck %s
+define float @src(float %arg1) {
+; CHECK-LABEL: define float @src(
+; CHECK-SAME: float [[ARG1:%.*]]) {
+; CHECK-NEXT:    [[V3:%.*]] = call float @llvm.fabs.f32(float [[ARG1]])
+; CHECK-NEXT:    ret float [[V3]]
+;
+  %v2 = fadd float %arg1, 0.000000e+00
+  %v3 = call float @llvm.fabs.f32(float %v2)
+  ret float %v3
+}

>From 3aaf1557e3f70ba099a1a3d378b2f82d2b6ec89b Mon Sep 17 00:00:00 2001
From: Vedant Paranjape <vedantparanjape160201 at gmail.com>
Date: Wed, 10 Sep 2025 14:19:51 -0400
Subject: [PATCH 2/6] moved opt to IC from InstSimplify

---
 llvm/lib/Analysis/InstructionSimplify.cpp              |  4 +---
 llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp  | 10 ++++++++++
 .../fold-fadd-with-zero-gh154238.ll                    |  2 +-
 3 files changed, 12 insertions(+), 4 deletions(-)
 rename llvm/test/Transforms/{InstSimplify => InstCombine}/fold-fadd-with-zero-gh154238.ll (88%)

diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index c49265d77dd4d..5907e21065331 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -5710,9 +5710,7 @@ simplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
   // fadd X, 0 ==> X, when we know X is not -0
   if (canIgnoreSNaN(ExBehavior, FMF))
     if (match(Op1, m_PosZeroFP()) &&
-        (FMF.noSignedZeros() || cannotBeNegativeZero(Op0, Q) ||
-         (Q.CxtI && !Q.CxtI->use_empty() &&
-          canIgnoreSignBitOfZero(*(Q.CxtI->use_begin())))))
+        (FMF.noSignedZeros() || cannotBeNegativeZero(Op0, Q)))
       return Op0;
 
   if (!isDefaultFPEnvironment(ExBehavior, Rounding))
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index d934638c15e75..3705dde993f2b 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -2002,6 +2002,16 @@ Instruction *InstCombinerImpl::visitFAdd(BinaryOperator &I) {
   if (Instruction *FoldedFAdd = foldBinOpIntoSelectOrPhi(I))
     return FoldedFAdd;
 
+  // B = fadd A, 0
+  // Z = Op B
+  // can be transformed into
+  // Z = Op A
+  // Where Op is such that we can ignore sign of 0 in fadd
+  Value *A;
+  if (match(&I, m_c_FAdd(m_Value(A), m_AnyZeroFP())) && !I.use_empty() &&
+      canIgnoreSignBitOfZero(*I.use_begin()))
+    return replaceInstUsesWith(I, A);
+
   // (-X) + Y --> Y - X
   Value *X, *Y;
   if (match(&I, m_c_FAdd(m_FNeg(m_Value(X)), m_Value(Y))))
diff --git a/llvm/test/Transforms/InstSimplify/fold-fadd-with-zero-gh154238.ll b/llvm/test/Transforms/InstCombine/fold-fadd-with-zero-gh154238.ll
similarity index 88%
rename from llvm/test/Transforms/InstSimplify/fold-fadd-with-zero-gh154238.ll
rename to llvm/test/Transforms/InstCombine/fold-fadd-with-zero-gh154238.ll
index bb12328574dda..dc2e3385a42b6 100644
--- a/llvm/test/Transforms/InstSimplify/fold-fadd-with-zero-gh154238.ll
+++ b/llvm/test/Transforms/InstCombine/fold-fadd-with-zero-gh154238.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt < %s -passes=instsimplify -S | FileCheck %s
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
 define float @src(float %arg1) {
 ; CHECK-LABEL: define float @src(
 ; CHECK-SAME: float [[ARG1:%.*]]) {

>From 717c2c88361fb4da77679b0dc938166226cbb05b Mon Sep 17 00:00:00 2001
From: Vedant Paranjape <vedantparanjape160201 at gmail.com>
Date: Wed, 10 Sep 2025 15:17:02 -0400
Subject: [PATCH 3/6] revert fcanonicalize testcase

---
 llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll b/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
index 1973ede5a62a8..ab51693198a30 100644
--- a/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
+++ b/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
@@ -363,8 +363,9 @@ define amdgpu_kernel void @test_no_fold_canonicalize_fcopysign_value_f32(ptr add
 }
 
 ; GCN-LABEL: test_fold_canonicalize_fabs_value_f32:
-; VI: v_mul_f32_e64 [[V:v[0-9]+]], 1.0, |[[V]]|
-; GFX9: v_max_f32_e64 [[V:v[0-9]+]], |[[V]]|, |[[V]]|
+; GCN: v_and_b32_e32 [[V:v[0-9]+]], 0x7fffffff, v{{[0-9]+}}
+; GCN-NOT: v_mul
+; GCN-NOT: v_max
 ; GCN: {{flat|global}}_store_dword v{{.+}}, [[V]]
 define amdgpu_kernel void @test_fold_canonicalize_fabs_value_f32(ptr addrspace(1) %arg) {
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()

>From 486bbe7322a8b23f13cc8464d59e485b038ab593 Mon Sep 17 00:00:00 2001
From: Vedant Paranjape <vedantparanjape160201 at gmail.com>
Date: Thu, 11 Sep 2025 17:43:02 -0400
Subject: [PATCH 4/6] rework to check for a single use, and update the testcase

---
 .../Transforms/InstCombine/InstCombineAddSub.cpp   |  2 +-
 .../InstCombine/fold-fadd-with-zero-gh154238.ll    | 14 ++++++++++++++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 3705dde993f2b..6dfc6e11621d1 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -2008,7 +2008,7 @@ Instruction *InstCombinerImpl::visitFAdd(BinaryOperator &I) {
   // Z = Op A
   // Where Op is such that we can ignore sign of 0 in fadd
   Value *A;
-  if (match(&I, m_c_FAdd(m_Value(A), m_AnyZeroFP())) && !I.use_empty() &&
+  if (match(&I, m_OneUse(m_c_FAdd(m_Value(A), m_AnyZeroFP()))) &&
       canIgnoreSignBitOfZero(*I.use_begin()))
     return replaceInstUsesWith(I, A);
 
diff --git a/llvm/test/Transforms/InstCombine/fold-fadd-with-zero-gh154238.ll b/llvm/test/Transforms/InstCombine/fold-fadd-with-zero-gh154238.ll
index dc2e3385a42b6..f9f0ca8a08bcb 100644
--- a/llvm/test/Transforms/InstCombine/fold-fadd-with-zero-gh154238.ll
+++ b/llvm/test/Transforms/InstCombine/fold-fadd-with-zero-gh154238.ll
@@ -10,3 +10,17 @@ define float @src(float %arg1) {
   %v3 = call float @llvm.fabs.f32(float %v2)
   ret float %v3
 }
+
+define float @src2(float %arg1) {
+; CHECK-LABEL: define float @src2(
+; CHECK-SAME: float [[ARG1:%.*]]) {
+; CHECK-NEXT:    [[V2:%.*]] = fadd float [[ARG1]], 0.000000e+00
+; CHECK-NEXT:    [[V3:%.*]] = call float @llvm.fabs.f32(float [[V2]])
+; CHECK-NEXT:    [[V4:%.*]] = fsub float [[V2]], [[V3]]
+; CHECK-NEXT:    ret float [[V4]]
+;
+  %v2 = fadd float %arg1, 0.000000e+00
+  %v3 = call float @llvm.fabs.f32(float %v2)
+  %v4 = fsub float %v2, %v3
+  ret float %v4
+}

>From 9ff60ee6805f424626b10673c6174df1b1f7519e Mon Sep 17 00:00:00 2001
From: Vedant Paranjape <vedantparanjape160201 at gmail.com>
Date: Thu, 11 Sep 2025 18:22:42 -0400
Subject: [PATCH 5/6] Add support for FSub as well

---
 .../lib/Transforms/InstCombine/InstCombineAddSub.cpp | 12 +++++++++++-
 .../InstCombine/fold-fadd-with-zero-gh154238.ll      | 11 +++++++++++
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 6dfc6e11621d1..c45881959a37c 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -2002,7 +2002,7 @@ Instruction *InstCombinerImpl::visitFAdd(BinaryOperator &I) {
   if (Instruction *FoldedFAdd = foldBinOpIntoSelectOrPhi(I))
     return FoldedFAdd;
 
-  // B = fadd A, 0
+  // B = fadd A, 0.0
   // Z = Op B
   // can be transformed into
   // Z = Op A
@@ -3125,6 +3125,16 @@ Instruction *InstCombinerImpl::visitFSub(BinaryOperator &I) {
   Value *X, *Y;
   Constant *C;
 
+  // B = fsub A, 0.0
+  // Z = Op B
+  // can be transformed into
+  // Z = Op A
+  // Where Op is such that we can ignore sign of 0 in fsub
+  Value *A;
+  if (match(&I, m_OneUse(m_FSub(m_Value(A), m_AnyZeroFP()))) &&
+      canIgnoreSignBitOfZero(*I.use_begin()))
+    return replaceInstUsesWith(I, A);
+
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
   // If Op0 is not -0.0 or we can ignore -0.0: Z - (X - Y) --> Z + (Y - X)
   // Canonicalize to fadd to make analysis easier.
diff --git a/llvm/test/Transforms/InstCombine/fold-fadd-with-zero-gh154238.ll b/llvm/test/Transforms/InstCombine/fold-fadd-with-zero-gh154238.ll
index f9f0ca8a08bcb..b9d951dc2945a 100644
--- a/llvm/test/Transforms/InstCombine/fold-fadd-with-zero-gh154238.ll
+++ b/llvm/test/Transforms/InstCombine/fold-fadd-with-zero-gh154238.ll
@@ -24,3 +24,14 @@ define float @src2(float %arg1) {
   %v4 = fsub float %v2, %v3
   ret float %v4
 }
+
+define float @src_sub(float %arg1) {
+; CHECK-LABEL: define float @src_sub(
+; CHECK-SAME: float [[ARG1:%.*]]) {
+; CHECK-NEXT:    [[V3:%.*]] = call float @llvm.fabs.f32(float [[ARG1]])
+; CHECK-NEXT:    ret float [[V3]]
+;
+  %v2 = fsub float %arg1, 0.000000e+00
+  %v3 = call float @llvm.fabs.f32(float %v2)
+  ret float %v3
+}

>From 1dd1c852bb16de36ab05615a069eb258cd59046d Mon Sep 17 00:00:00 2001
From: Vedant Paranjape <vedantparanjape160201 at gmail.com>
Date: Thu, 11 Sep 2025 22:55:54 -0400
Subject: [PATCH 6/6] Update
 llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp

Co-authored-by: Yingwei Zheng <dtcxzyw at qq.com>
---
 llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index c45881959a37c..4962fae61c471 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -2008,7 +2008,7 @@ Instruction *InstCombinerImpl::visitFAdd(BinaryOperator &I) {
   // Z = Op A
   // Where Op is such that we can ignore sign of 0 in fadd
   Value *A;
-  if (match(&I, m_OneUse(m_c_FAdd(m_Value(A), m_AnyZeroFP()))) &&
+  if (match(&I, m_OneUse(m_FAdd(m_Value(A), m_AnyZeroFP()))) &&
       canIgnoreSignBitOfZero(*I.use_begin()))
     return replaceInstUsesWith(I, A);