[llvm] [InstCombine] Generalize ignoreSignBitOfZero/NaN to handle more cases (PR #141015)
via llvm-commits
llvm-commits at lists.llvm.org
Thu May 22 00:41:46 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Yingwei Zheng (dtcxzyw)
<details>
<summary>Changes</summary>
This patch was originally part of https://github.com/llvm/llvm-project/pull/139861. It generalizes `ignoreSignBitOfZero/NaN` to handle more instructions/intrinsics.
BTW, I find it mitigates performance regressions caused by https://github.com/llvm/llvm-project/pull/141010 (IR diff https://github.com/dtcxzyw/llvm-opt-benchmark/pull/2365/files). We don't need to propagate FMF from fcmp into select, since we can infer demanded properties from the user of select.
---
Full diff: https://github.com/llvm/llvm-project/pull/141015.diff
2 Files Affected:
- (modified) llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp (+91-13)
- (modified) llvm/test/Transforms/InstCombine/fabs.ll (+105-9)
``````````diff
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 2ef233bc25d72..13cbf7b215557 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -2783,15 +2783,41 @@ static bool ignoreSignBitOfZero(Instruction &I) {
if (!I.hasOneUse())
return false;
Instruction *User = I.user_back();
+ if (auto *FPOp = dyn_cast<FPMathOperator>(User)) {
+ if (FPOp->hasNoSignedZeros())
+ return true;
+ }
- // fcmp treats both positive and negative zero as equal.
- if (User->getOpcode() == Instruction::FCmp)
+ switch (User->getOpcode()) {
+ case Instruction::FPToSI:
+ case Instruction::FPToUI:
return true;
-
- if (auto *FPOp = dyn_cast<FPMathOperator>(User))
- return FPOp->hasNoSignedZeros();
-
- return false;
+ case Instruction::FCmp:
+ // fcmp treats both positive and negative zero as equal.
+ return true;
+ case Instruction::Call:
+ if (auto *II = dyn_cast<IntrinsicInst>(User)) {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::fabs:
+ return true;
+ case Intrinsic::copysign:
+ return II->getArgOperand(0) == &I;
+ case Intrinsic::is_fpclass:
+ case Intrinsic::vp_is_fpclass: {
+ auto Test =
+ static_cast<FPClassTest>(
+ cast<ConstantInt>(II->getArgOperand(1))->getZExtValue()) &
+ FPClassTest::fcZero;
+ return Test == FPClassTest::fcZero || Test == FPClassTest::fcNone;
+ }
+ default:
+ return false;
+ }
+ }
+ return false;
+ default:
+ return false;
+ }
}
/// Return true if the sign bit of result can be ignored when the result is NaN.
@@ -2803,15 +2829,67 @@ static bool ignoreSignBitOfNaN(Instruction &I) {
if (!I.hasOneUse())
return false;
Instruction *User = I.user_back();
+ if (auto *FPOp = dyn_cast<FPMathOperator>(User)) {
+ if (FPOp->hasNoNaNs())
+ return true;
+ }
- // fcmp ignores the sign bit of NaN.
- if (User->getOpcode() == Instruction::FCmp)
+ switch (User->getOpcode()) {
+ case Instruction::FPToSI:
+ case Instruction::FPToUI:
return true;
+ // Proper FP math operations ignore the sign bit of NaN.
+ case Instruction::FAdd:
+ case Instruction::FSub:
+ case Instruction::FMul:
+ case Instruction::FDiv:
+ case Instruction::FRem:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::FCmp:
+ return true;
+ // Bitwise FP operations should preserve the sign bit of NaN.
+ case Instruction::FNeg:
+ case Instruction::Select:
+ case Instruction::PHI:
+ return false;
+ case Instruction::Call:
+ case Instruction::Invoke: {
+ if (auto *II = dyn_cast<IntrinsicInst>(User)) {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::fabs:
+ return true;
+ case Intrinsic::copysign:
+ return II->getArgOperand(0) == &I;
+ // Other proper FP math intrinsics ignore the sign bit of NaN.
+ case Intrinsic::maxnum:
+ case Intrinsic::minnum:
+ case Intrinsic::maximum:
+ case Intrinsic::minimum:
+ case Intrinsic::maximumnum:
+ case Intrinsic::minimumnum:
+ case Intrinsic::canonicalize:
+ case Intrinsic::fma:
+ case Intrinsic::fmuladd:
+ case Intrinsic::sqrt:
+ case Intrinsic::pow:
+ case Intrinsic::powi:
+ case Intrinsic::fptoui_sat:
+ case Intrinsic::fptosi_sat:
+ case Intrinsic::is_fpclass:
+ return true;
+ default:
+ return false;
+ }
+ }
- if (auto *FPOp = dyn_cast<FPMathOperator>(User))
- return FPOp->hasNoNaNs();
-
- return false;
+ FPClassTest NoFPClass = cast<CallBase>(User)->getParamNoFPClass(
+ I.uses().begin()->getOperandNo());
+ return NoFPClass & FPClassTest::fcNan;
+ }
+ default:
+ return false;
+ }
}
// Canonicalize select with fcmp to fabs(). -0.0 makes this tricky. We need
diff --git a/llvm/test/Transforms/InstCombine/fabs.ll b/llvm/test/Transforms/InstCombine/fabs.ll
index ab4376bf78a67..b889f6c2c028c 100644
--- a/llvm/test/Transforms/InstCombine/fabs.ll
+++ b/llvm/test/Transforms/InstCombine/fabs.ll
@@ -1329,12 +1329,9 @@ define float @test_fabs_fsub_used_by_fpop_nnan(float %x, float %y) {
ret float %add
}
-; TODO: fadd ignores the sign bit of NaN.
define float @test_fabs_used_by_fpop_nsz(float %x, float %y) {
; CHECK-LABEL: @test_fabs_used_by_fpop_nsz(
-; CHECK-NEXT: [[CMP:%.*]] = fcmp oge float [[X:%.*]], 0.000000e+00
-; CHECK-NEXT: [[NEG:%.*]] = fneg float [[X]]
-; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], float [[X]], float [[NEG]]
+; CHECK-NEXT: [[SEL:%.*]] = call float @llvm.fabs.f32(float [[X:%.*]])
; CHECK-NEXT: [[ADD:%.*]] = fadd nsz float [[SEL]], [[Y:%.*]]
; CHECK-NEXT: ret float [[ADD]]
;
@@ -1345,13 +1342,9 @@ define float @test_fabs_used_by_fpop_nsz(float %x, float %y) {
ret float %add
}
-; TODO: copysign ignores the sign bit of NaN magnitude.
define float @test_fabs_used_by_fcopysign_mag(float %x, float %y) {
; CHECK-LABEL: @test_fabs_used_by_fcopysign_mag(
-; CHECK-NEXT: [[CMP:%.*]] = fcmp oge float [[X1:%.*]], 0.000000e+00
-; CHECK-NEXT: [[NEG:%.*]] = fneg float [[X1]]
-; CHECK-NEXT: [[X:%.*]] = select i1 [[CMP]], float [[X1]], float [[NEG]]
-; CHECK-NEXT: [[COPYSIGN:%.*]] = call float @llvm.copysign.f32(float [[X]], float [[Y:%.*]])
+; CHECK-NEXT: [[COPYSIGN:%.*]] = call float @llvm.copysign.f32(float [[X:%.*]], float [[Y:%.*]])
; CHECK-NEXT: ret float [[COPYSIGN]]
;
%cmp = fcmp oge float %x, 0.000000e+00
@@ -1361,6 +1354,94 @@ define float @test_fabs_used_by_fcopysign_mag(float %x, float %y) {
ret float %copysign
}
+define float @test_fabs_nsz_used_by_canonicalize(float %x) {
+; CHECK-LABEL: @test_fabs_nsz_used_by_canonicalize(
+; CHECK-NEXT: [[SEL:%.*]] = call nsz float @llvm.fabs.f32(float [[X:%.*]])
+; CHECK-NEXT: [[CANON:%.*]] = call float @llvm.canonicalize.f32(float [[SEL]])
+; CHECK-NEXT: ret float [[CANON]]
+;
+ %cmp = fcmp oge float %x, 0.000000e+00
+ %neg = fneg float %x
+ %sel = select nsz i1 %cmp, float %x, float %neg
+ %canon = call float @llvm.canonicalize.f32(float %sel)
+ ret float %canon
+}
+
+define void @test_fabs_used_by_nofpclass_nan(float %x) {
+; CHECK-LABEL: @test_fabs_used_by_nofpclass_nan(
+; CHECK-NEXT: [[SEL:%.*]] = call nsz float @llvm.fabs.f32(float [[X:%.*]])
+; CHECK-NEXT: call void @use(float nofpclass(nan) [[SEL]])
+; CHECK-NEXT: ret void
+;
+ %cmp = fcmp oge float %x, 0.000000e+00
+ %neg = fneg float %x
+ %sel = select nsz i1 %cmp, float %x, float %neg
+ call void @use(float nofpclass(nan) %sel)
+ ret void
+}
+
+define i32 @test_fabs_used_fptosi(float %x) {
+; CHECK-LABEL: @test_fabs_used_fptosi(
+; CHECK-NEXT: [[SEL:%.*]] = call float @llvm.fabs.f32(float [[X:%.*]])
+; CHECK-NEXT: [[FPTOSI:%.*]] = fptosi float [[SEL]] to i32
+; CHECK-NEXT: ret i32 [[FPTOSI]]
+;
+ %cmp = fcmp oge float %x, 0.000000e+00
+ %neg = fneg float %x
+ %sel = select i1 %cmp, float %x, float %neg
+ %fptosi = fptosi float %sel to i32
+ ret i32 %fptosi
+}
+
+define i32 @test_fabs_used_fptoui(float %x) {
+; CHECK-LABEL: @test_fabs_used_fptoui(
+; CHECK-NEXT: [[SEL:%.*]] = call float @llvm.fabs.f32(float [[X:%.*]])
+; CHECK-NEXT: [[FPTOSI:%.*]] = fptoui float [[SEL]] to i32
+; CHECK-NEXT: ret i32 [[FPTOSI]]
+;
+ %cmp = fcmp oge float %x, 0.000000e+00
+ %neg = fneg float %x
+ %sel = select i1 %cmp, float %x, float %neg
+ %fptosi = fptoui float %sel to i32
+ ret i32 %fptosi
+}
+
+define float @test_fabs_nsz_used_by_maxnum(float %x, float %y) {
+; CHECK-LABEL: @test_fabs_nsz_used_by_maxnum(
+; CHECK-NEXT: [[SEL:%.*]] = call nsz float @llvm.fabs.f32(float [[X:%.*]])
+; CHECK-NEXT: [[MAX:%.*]] = call float @llvm.maxnum.f32(float [[Y:%.*]], float [[SEL]])
+; CHECK-NEXT: ret float [[MAX]]
+;
+ %cmp = fcmp oge float %x, 0.000000e+00
+ %neg = fneg float %x
+ %sel = select nsz i1 %cmp, float %x, float %neg
+ %max = call float @llvm.maxnum.f32(float %y, float %sel)
+ ret float %max
+}
+
+define i1 @test_fabs_used_is_fpclass_pnorm_or_nan(float %x) {
+; CHECK-LABEL: @test_fabs_used_is_fpclass_pnorm_or_nan(
+; CHECK-NEXT: [[IS_FPCLASS:%.*]] = call i1 @llvm.is.fpclass.f32(float [[X:%.*]], i32 267)
+; CHECK-NEXT: ret i1 [[IS_FPCLASS]]
+;
+ %cmp = fcmp oge float %x, 0.000000e+00
+ %neg = fneg float %x
+ %sel = select i1 %cmp, float %x, float %neg
+ %is_fpclass = call i1 @llvm.is.fpclass.f32(float %sel, i32 259)
+ ret i1 %is_fpclass
+}
+
+define i1 @test_fabs_used_is_fpclass_zero_or_pinf(float %x) {
+; CHECK-LABEL: @test_fabs_used_is_fpclass_zero_or_pinf(
+; CHECK-NEXT: [[IS_FPCLASS:%.*]] = call i1 @llvm.is.fpclass.f32(float [[X:%.*]], i32 612)
+; CHECK-NEXT: ret i1 [[IS_FPCLASS]]
+;
+ %cmp = fcmp oge float %x, 0.000000e+00
+ %neg = fneg float %x
+ %sel = select i1 %cmp, float %x, float %neg
+ %is_fpclass = call i1 @llvm.is.fpclass.f32(float %sel, i32 608)
+ ret i1 %is_fpclass
+}
; Negative tests
@@ -1455,3 +1536,18 @@ define float @test_fabs_used_by_select(float %x, i1 %cond) {
%sel2 = select i1 %cond, float %sel, float 0.000000e+00
ret float %sel2
}
+
+define i1 @test_fabs_used_is_fpclass_pzero(float %x) {
+; CHECK-LABEL: @test_fabs_used_is_fpclass_pzero(
+; CHECK-NEXT: [[CMP:%.*]] = fcmp oge float [[X:%.*]], 0.000000e+00
+; CHECK-NEXT: [[NEG:%.*]] = fneg float [[X]]
+; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], float [[X]], float [[NEG]]
+; CHECK-NEXT: [[IS_FPCLASS:%.*]] = call i1 @llvm.is.fpclass.f32(float [[SEL]], i32 64)
+; CHECK-NEXT: ret i1 [[IS_FPCLASS]]
+;
+ %cmp = fcmp oge float %x, 0.000000e+00
+ %neg = fneg float %x
+ %sel = select i1 %cmp, float %x, float %neg
+ %is_fpclass = call i1 @llvm.is.fpclass.f32(float %sel, i32 64)
+ ret i1 %is_fpclass
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/141015
More information about the llvm-commits
mailing list