[llvm] d13947b - [InstCombine] Enable more fabs fold when the user ignores sign bit of zero/NaN (#139861)
via llvm-commits
llvm-commits at lists.llvm.org
Wed May 21 08:50:03 PDT 2025
Author: Yingwei Zheng
Date: 2025-05-21T23:50:00+08:00
New Revision: d13947bd6caf442151a4b3f51e3e8b226c490535
URL: https://github.com/llvm/llvm-project/commit/d13947bd6caf442151a4b3f51e3e8b226c490535
DIFF: https://github.com/llvm/llvm-project/commit/d13947bd6caf442151a4b3f51e3e8b226c490535.diff
LOG: [InstCombine] Enable more fabs fold when the user ignores sign bit of zero/NaN (#139861)
When the only user of select is a fcmp or a fp operation with nnan/nsz,
the sign bit of zero/NaN can be ignored.
Alive2: https://alive2.llvm.org/ce/z/ZcxeIv
Compile-time impact:
https://llvm-compile-time-tracker.com/compare.php?from=7add1bcd02b1f72d580bb2e64a1fe4a8bdc085d9&to=cb419c7cbddce778673f3d4b414ed9b8064b8d6e&stat=instructions:u
Closes https://github.com/llvm/llvm-project/issues/133367.
Added:
Modified:
llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
llvm/test/Transforms/InstCombine/fabs.ll
llvm/test/Transforms/InstCombine/fneg.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index b5a40892694c1..2ef233bc25d72 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -2773,6 +2773,47 @@ Instruction *InstCombinerImpl::foldAndOrOfSelectUsingImpliedCond(Value *Op,
return nullptr;
}
+/// Return true if the sign bit of result can be ignored when the result is
+/// zero.
+static bool ignoreSignBitOfZero(Instruction &I) {
+ if (I.hasNoSignedZeros())
+ return true;
+
+ // Check if the sign bit is ignored by the only user.
+ if (!I.hasOneUse())
+ return false;
+ Instruction *User = I.user_back();
+
+ // fcmp treats both positive and negative zero as equal.
+ if (User->getOpcode() == Instruction::FCmp)
+ return true;
+
+ if (auto *FPOp = dyn_cast<FPMathOperator>(User))
+ return FPOp->hasNoSignedZeros();
+
+ return false;
+}
+
+/// Return true if the sign bit of result can be ignored when the result is NaN.
+static bool ignoreSignBitOfNaN(Instruction &I) {
+ if (I.hasNoNaNs())
+ return true;
+
+ // Check if the sign bit is ignored by the only user.
+ if (!I.hasOneUse())
+ return false;
+ Instruction *User = I.user_back();
+
+ // fcmp ignores the sign bit of NaN.
+ if (User->getOpcode() == Instruction::FCmp)
+ return true;
+
+ if (auto *FPOp = dyn_cast<FPMathOperator>(User))
+ return FPOp->hasNoNaNs();
+
+ return false;
+}
+
// Canonicalize select with fcmp to fabs(). -0.0 makes this tricky. We need
// fast-math-flags (nsz) or fsub with +0.0 (not fneg) for this to work.
static Instruction *foldSelectWithFCmpToFabs(SelectInst &SI,
@@ -2797,7 +2838,7 @@ static Instruction *foldSelectWithFCmpToFabs(SelectInst &SI,
// of NAN, but IEEE-754 specifies the signbit of NAN values with
// fneg/fabs operations.
if (match(TrueVal, m_FSub(m_PosZeroFP(), m_Specific(X))) &&
- (cast<FPMathOperator>(CondVal)->hasNoNaNs() || SI.hasNoNaNs() ||
+ (cast<FPMathOperator>(CondVal)->hasNoNaNs() || ignoreSignBitOfNaN(SI) ||
isKnownNeverNaN(X, /*Depth=*/0,
IC.getSimplifyQuery().getWithInstruction(
cast<Instruction>(CondVal))))) {
@@ -2844,7 +2885,7 @@ static Instruction *foldSelectWithFCmpToFabs(SelectInst &SI,
// Note: We require "nnan" for this fold because fcmp ignores the signbit
// of NAN, but IEEE-754 specifies the signbit of NAN values with
// fneg/fabs operations.
- if (!SI.hasNoSignedZeros() || !SI.hasNoNaNs())
+ if (!ignoreSignBitOfZero(SI) || !ignoreSignBitOfNaN(SI))
return nullptr;
if (Swap)
diff --git a/llvm/test/Transforms/InstCombine/fabs.ll b/llvm/test/Transforms/InstCombine/fabs.ll
index f449d4b8e6b37..ab4376bf78a67 100644
--- a/llvm/test/Transforms/InstCombine/fabs.ll
+++ b/llvm/test/Transforms/InstCombine/fabs.ll
@@ -1276,3 +1276,182 @@ define <2 x float> @test_select_neg_negx_x_wrong_type(<2 x float> %value) {
%value.addr.0.i = select i1 %a1, <2 x float> %fneg.i, <2 x float> %value
ret <2 x float> %value.addr.0.i
}
+
+define i1 @test_fabs_used_by_fcmp(float %x, float %y) {
+; CHECK-LABEL: @test_fabs_used_by_fcmp(
+; CHECK-NEXT: [[SEL:%.*]] = call float @llvm.fabs.f32(float [[X:%.*]])
+; CHECK-NEXT: [[CMP2:%.*]] = fcmp olt float [[SEL]], [[Y:%.*]]
+; CHECK-NEXT: ret i1 [[CMP2]]
+;
+ %cmp = fcmp oge float %x, 0.000000e+00
+ %neg = fneg float %x
+ %sel = select i1 %cmp, float %x, float %neg
+ %cmp2 = fcmp olt float %sel, %y
+ ret i1 %cmp2
+}
+
+define float @test_fabs_used_by_fpop_nnan_nsz(float %x, float %y) {
+; CHECK-LABEL: @test_fabs_used_by_fpop_nnan_nsz(
+; CHECK-NEXT: [[SEL:%.*]] = call float @llvm.fabs.f32(float [[X:%.*]])
+; CHECK-NEXT: [[ADD:%.*]] = fadd nnan nsz float [[SEL]], [[Y:%.*]]
+; CHECK-NEXT: ret float [[ADD]]
+;
+ %cmp = fcmp oge float %x, 0.000000e+00
+ %neg = fneg float %x
+ %sel = select i1 %cmp, float %x, float %neg
+ %add = fadd nnan nsz float %sel, %y
+ ret float %add
+}
+
+define i1 @test_fabs_fsub_used_by_fcmp(float %x, float %y) {
+; CHECK-LABEL: @test_fabs_fsub_used_by_fcmp(
+; CHECK-NEXT: [[SEL:%.*]] = call float @llvm.fabs.f32(float [[X:%.*]])
+; CHECK-NEXT: [[CMP2:%.*]] = fcmp olt float [[SEL]], [[Y:%.*]]
+; CHECK-NEXT: ret i1 [[CMP2]]
+;
+ %cmp = fcmp ogt float %x, 0.000000e+00
+ %neg = fsub float 0.000000e+00, %x
+ %sel = select i1 %cmp, float %x, float %neg
+ %cmp2 = fcmp olt float %sel, %y
+ ret i1 %cmp2
+}
+
+define float @test_fabs_fsub_used_by_fpop_nnan(float %x, float %y) {
+; CHECK-LABEL: @test_fabs_fsub_used_by_fpop_nnan(
+; CHECK-NEXT: [[SEL:%.*]] = call float @llvm.fabs.f32(float [[X:%.*]])
+; CHECK-NEXT: [[ADD:%.*]] = fadd nnan float [[SEL]], [[Y:%.*]]
+; CHECK-NEXT: ret float [[ADD]]
+;
+ %cmp = fcmp ogt float %x, 0.000000e+00
+ %neg = fsub float 0.000000e+00, %x
+ %sel = select i1 %cmp, float %x, float %neg
+ %add = fadd nnan float %sel, %y
+ ret float %add
+}
+
+; TODO: fadd ignores the sign bit of NaN.
+define float @test_fabs_used_by_fpop_nsz(float %x, float %y) {
+; CHECK-LABEL: @test_fabs_used_by_fpop_nsz(
+; CHECK-NEXT: [[CMP:%.*]] = fcmp oge float [[X:%.*]], 0.000000e+00
+; CHECK-NEXT: [[NEG:%.*]] = fneg float [[X]]
+; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], float [[X]], float [[NEG]]
+; CHECK-NEXT: [[ADD:%.*]] = fadd nsz float [[SEL]], [[Y:%.*]]
+; CHECK-NEXT: ret float [[ADD]]
+;
+ %cmp = fcmp oge float %x, 0.000000e+00
+ %neg = fneg float %x
+ %sel = select i1 %cmp, float %x, float %neg
+ %add = fadd nsz float %sel, %y
+ ret float %add
+}
+
+; TODO: copysign ignores the sign bit of NaN magnitude.
+define float @test_fabs_used_by_fcopysign_mag(float %x, float %y) {
+; CHECK-LABEL: @test_fabs_used_by_fcopysign_mag(
+; CHECK-NEXT: [[CMP:%.*]] = fcmp oge float [[X1:%.*]], 0.000000e+00
+; CHECK-NEXT: [[NEG:%.*]] = fneg float [[X1]]
+; CHECK-NEXT: [[X:%.*]] = select i1 [[CMP]], float [[X1]], float [[NEG]]
+; CHECK-NEXT: [[COPYSIGN:%.*]] = call float @llvm.copysign.f32(float [[X]], float [[Y:%.*]])
+; CHECK-NEXT: ret float [[COPYSIGN]]
+;
+ %cmp = fcmp oge float %x, 0.000000e+00
+ %neg = fneg float %x
+ %sel = select i1 %cmp, float %x, float %neg
+ %copysign = call float @llvm.copysign.f32(float %sel, float %y)
+ ret float %copysign
+}
+
+
+; Negative tests
+
+define float @test_fabs_used_by_fpop_nnan(float %x, float %y) {
+; CHECK-LABEL: @test_fabs_used_by_fpop_nnan(
+; CHECK-NEXT: [[CMP:%.*]] = fcmp oge float [[X:%.*]], 0.000000e+00
+; CHECK-NEXT: [[NEG:%.*]] = fneg float [[X]]
+; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], float [[X]], float [[NEG]]
+; CHECK-NEXT: [[ADD:%.*]] = fadd nnan float [[SEL]], [[Y:%.*]]
+; CHECK-NEXT: ret float [[ADD]]
+;
+ %cmp = fcmp oge float %x, 0.000000e+00
+ %neg = fneg float %x
+ %sel = select i1 %cmp, float %x, float %neg
+ %add = fadd nnan float %sel, %y
+ ret float %add
+}
+
+define i1 @test_fabs_used_by_fcmp_multiuse(float %x, float %y) {
+; CHECK-LABEL: @test_fabs_used_by_fcmp_multiuse(
+; CHECK-NEXT: [[CMP:%.*]] = fcmp oge float [[X:%.*]], 0.000000e+00
+; CHECK-NEXT: [[NEG:%.*]] = fneg float [[X]]
+; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], float [[X]], float [[NEG]]
+; CHECK-NEXT: [[CMP2:%.*]] = fcmp olt float [[SEL]], [[Y:%.*]]
+; CHECK-NEXT: call void @use(float [[SEL]])
+; CHECK-NEXT: ret i1 [[CMP2]]
+;
+ %cmp = fcmp oge float %x, 0.000000e+00
+ %neg = fneg float %x
+ %sel = select i1 %cmp, float %x, float %neg
+ %cmp2 = fcmp olt float %sel, %y
+ call void @use(float %sel)
+ ret i1 %cmp2
+}
+
+define float @test_fabs_used_by_fcopysign_sign(float %x, float %y) {
+; CHECK-LABEL: @test_fabs_used_by_fcopysign_sign(
+; CHECK-NEXT: [[CMP:%.*]] = fcmp oge float [[X:%.*]], 0.000000e+00
+; CHECK-NEXT: [[NEG:%.*]] = fneg float [[X]]
+; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], float [[X]], float [[NEG]]
+; CHECK-NEXT: [[COPYSIGN:%.*]] = call float @llvm.copysign.f32(float [[Y:%.*]], float [[SEL]])
+; CHECK-NEXT: ret float [[COPYSIGN]]
+;
+ %cmp = fcmp oge float %x, 0.000000e+00
+ %neg = fneg float %x
+ %sel = select i1 %cmp, float %x, float %neg
+ %copysign = call float @llvm.copysign.f32(float %y, float %sel)
+ ret float %copysign
+}
+
+define float @test_fabs_used_by_maxnum(float %x, float %y) {
+; CHECK-LABEL: @test_fabs_used_by_maxnum(
+; CHECK-NEXT: [[CMP:%.*]] = fcmp oge float [[X:%.*]], 0.000000e+00
+; CHECK-NEXT: [[NEG:%.*]] = fneg float [[X]]
+; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], float [[X]], float [[NEG]]
+; CHECK-NEXT: [[MAX:%.*]] = call float @llvm.maxnum.f32(float [[Y:%.*]], float [[SEL]])
+; CHECK-NEXT: ret float [[MAX]]
+;
+ %cmp = fcmp oge float %x, 0.000000e+00
+ %neg = fneg float %x
+ %sel = select i1 %cmp, float %x, float %neg
+ %max = call float @llvm.maxnum.f32(float %y, float %sel)
+ ret float %max
+}
+
+define float @test_fabs_used_by_canonicalize(float %x) {
+; CHECK-LABEL: @test_fabs_used_by_canonicalize(
+; CHECK-NEXT: [[CMP:%.*]] = fcmp oge float [[X:%.*]], 0.000000e+00
+; CHECK-NEXT: [[NEG:%.*]] = fneg float [[X]]
+; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], float [[X]], float [[NEG]]
+; CHECK-NEXT: [[CANON:%.*]] = call float @llvm.canonicalize.f32(float [[SEL]])
+; CHECK-NEXT: ret float [[CANON]]
+;
+ %cmp = fcmp oge float %x, 0.000000e+00
+ %neg = fneg float %x
+ %sel = select i1 %cmp, float %x, float %neg
+ %canon = call float @llvm.canonicalize.f32(float %sel)
+ ret float %canon
+}
+
+define float @test_fabs_used_by_select(float %x, i1 %cond) {
+; CHECK-LABEL: @test_fabs_used_by_select(
+; CHECK-NEXT: [[CMP:%.*]] = fcmp oge float [[X:%.*]], 0.000000e+00
+; CHECK-NEXT: [[NEG:%.*]] = fneg float [[X]]
+; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], float [[X]], float [[NEG]]
+; CHECK-NEXT: [[SEL2:%.*]] = select i1 [[COND:%.*]], float [[SEL]], float 0.000000e+00
+; CHECK-NEXT: ret float [[SEL2]]
+;
+ %cmp = fcmp oge float %x, 0.000000e+00
+ %neg = fneg float %x
+ %sel = select i1 %cmp, float %x, float %neg
+ %sel2 = select i1 %cond, float %sel, float 0.000000e+00
+ ret float %sel2
+}
diff --git a/llvm/test/Transforms/InstCombine/fneg.ll b/llvm/test/Transforms/InstCombine/fneg.ll
index 755beff9bf77a..a9d1b9a4ab837 100644
--- a/llvm/test/Transforms/InstCombine/fneg.ll
+++ b/llvm/test/Transforms/InstCombine/fneg.ll
@@ -709,7 +709,7 @@ define float @select_common_op_fneg_false(float %x, i1 %b) {
define float @fabs(float %a) {
; CHECK-LABEL: @fabs(
-; CHECK-NEXT: [[FNEG1:%.*]] = call nnan ninf nsz float @llvm.fabs.f32(float [[A:%.*]])
+; CHECK-NEXT: [[FNEG1:%.*]] = call float @llvm.fabs.f32(float [[A:%.*]])
; CHECK-NEXT: ret float [[FNEG1]]
;
%fneg = fneg float %a
@@ -721,7 +721,7 @@ define float @fabs(float %a) {
define float @fnabs(float %a) {
; CHECK-LABEL: @fnabs(
-; CHECK-NEXT: [[TMP1:%.*]] = call fast float @llvm.fabs.f32(float [[A:%.*]])
+; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[A:%.*]])
; CHECK-NEXT: [[FNEG1:%.*]] = fneg fast float [[TMP1]]
; CHECK-NEXT: ret float [[FNEG1]]
;
@@ -734,7 +734,7 @@ define float @fnabs(float %a) {
define float @fnabs_1(float %a) {
; CHECK-LABEL: @fnabs_1(
-; CHECK-NEXT: [[TMP1:%.*]] = call fast float @llvm.fabs.f32(float [[A:%.*]])
+; CHECK-NEXT: [[TMP1:%.*]] = call float @llvm.fabs.f32(float [[A:%.*]])
; CHECK-NEXT: [[FNEG1:%.*]] = fneg fast float [[TMP1]]
; CHECK-NEXT: ret float [[FNEG1]]
;
More information about the llvm-commits
mailing list