[llvm] [ValueTracking] Merge `cannotBeOrderedLessThanZeroImpl` into `computeKnownFPClass` (PR #76360)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 25 07:42:36 PST 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-analysis
Author: Yingwei Zheng (dtcxzyw)
<details>
<summary>Changes</summary>
This patch merges the logic of `cannotBeOrderedLessThanZeroImpl` into `computeKnownFPClass` to improve the signbit inference.
---
Patch is 23.44 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/76360.diff
7 Files Affected:
- (modified) llvm/include/llvm/Analysis/ValueTracking.h (+24-9)
- (modified) llvm/lib/Analysis/InstructionSimplify.cpp (+5-4)
- (modified) llvm/lib/Analysis/ValueTracking.cpp (+39-207)
- (modified) llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp (+13-13)
- (modified) llvm/test/CodeGen/Thumb2/mve-vmaxnma-commute.ll (+8-4)
- (modified) llvm/test/Transforms/InstCombine/copysign-fneg-fabs.ll (+2-3)
- (modified) llvm/test/Transforms/InstSimplify/floating-point-arithmetic-strictfp.ll (+2-6)
``````````diff
diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h
index baa16306ebf5df..ec08c56a9fb62a 100644
--- a/llvm/include/llvm/Analysis/ValueTracking.h
+++ b/llvm/include/llvm/Analysis/ValueTracking.h
@@ -332,6 +332,12 @@ struct KnownFPClass {
void knownNot(FPClassTest RuleOut) {
KnownFPClasses = KnownFPClasses & ~RuleOut;
+ if (isKnownNever(fcNan) && !SignBit) {
+ if (isKnownNever(OrderedLessThanZeroMask | fcNegZero))
+ SignBit = false;
+ else if (isKnownNever(OrderedGreaterThanZeroMask | fcPosZero))
+ SignBit = true;
+ }
}
void fneg() {
@@ -367,6 +373,12 @@ struct KnownFPClass {
SignBit = false;
}
+ /// Assume the sign bit is one.
+ void signBitMustBeOne() {
+ KnownFPClasses &= (fcNegative | fcNan);
+ SignBit = true;
+ }
+
void copysign(const KnownFPClass &Sign) {
// Don't know anything about the sign of the source. Expand the possible set
// to its opposite sign pair.
@@ -553,15 +565,18 @@ inline bool isKnownNeverNaN(const Value *V, const DataLayout &DL,
return Known.isKnownNeverNaN();
}
-/// Return true if we can prove that the specified FP value's sign bit is 0.
-///
-/// NaN --> true/false (depending on the NaN's sign bit)
-/// +0 --> true
-/// -0 --> false
-/// x > +0 --> true
-/// x < -0 --> false
-bool SignBitMustBeZero(const Value *V, const DataLayout &DL,
- const TargetLibraryInfo *TLI);
+/// Return false if we can prove that the specified FP value's sign bit is 0.
+/// Return true if we can prove that the specified FP value's sign bit is 1.
+/// Otherwise return std::nullopt.
+inline std::optional<bool> computeKnownFPSignBit(
+ const Value *V, const DataLayout &DL,
+ const TargetLibraryInfo *TLI = nullptr, unsigned Depth = 0,
+ AssumptionCache *AC = nullptr, const Instruction *CtxI = nullptr,
+ const DominatorTree *DT = nullptr, bool UseInstrInfo = true) {
+ KnownFPClass Known = computeKnownFPClass(V, DL, fcAllFlags, Depth, TLI, AC,
+ CtxI, DT, UseInstrInfo);
+ return Known.SignBit;
+}
/// If the specified value can be set by repeating the same byte in memory,
/// return the i8 value that it is represented with. This is true for all i8
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 5beac5547d65e0..8530c6a757586f 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -5740,9 +5740,9 @@ static Value *simplifyFMAFMul(Value *Op0, Value *Op1, FastMathFlags FMF,
return ConstantFP::getZero(Op0->getType());
// +normal number * (-)0.0 --> (-)0.0
- if (isKnownNeverInfOrNaN(Op0, Q.DL, Q.TLI, 0, Q.AC, Q.CxtI, Q.DT) &&
- // TODO: Check SignBit from computeKnownFPClass when it's more complete.
- SignBitMustBeZero(Op0, Q.DL, Q.TLI))
+ KnownFPClass Known = computeKnownFPClass(
+ Op0, FMF, Q.DL, fcInf | fcNan, /*Depth*/ 0, Q.TLI, Q.AC, Q.CxtI, Q.DT);
+ if (Known.SignBit == false && Known.isKnownNever(fcInf | fcNan))
return Op1;
}
@@ -6195,7 +6195,8 @@ static Value *simplifyUnaryIntrinsic(Function *F, Value *Op0,
Value *X;
switch (IID) {
case Intrinsic::fabs:
- if (SignBitMustBeZero(Op0, Q.DL, Q.TLI))
+ if (computeKnownFPSignBit(Op0, Q.DL, Q.TLI, /*Depth*/ 0, Q.AC, Q.CxtI,
+ Q.DT) == false)
return Op0;
break;
case Intrinsic::bswap:
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index cac2602d455f9d..60ff4d128d9b05 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -3737,205 +3737,6 @@ Intrinsic::ID llvm::getIntrinsicForCallSite(const CallBase &CB,
return Intrinsic::not_intrinsic;
}
-/// Deprecated, use computeKnownFPClass instead.
-///
-/// If \p SignBitOnly is true, test for a known 0 sign bit rather than a
-/// standard ordered compare. e.g. make -0.0 olt 0.0 be true because of the sign
-/// bit despite comparing equal.
-static bool cannotBeOrderedLessThanZeroImpl(const Value *V,
- const DataLayout &DL,
- const TargetLibraryInfo *TLI,
- bool SignBitOnly, unsigned Depth) {
- // TODO: This function does not do the right thing when SignBitOnly is true
- // and we're lowering to a hypothetical IEEE 754-compliant-but-evil platform
- // which flips the sign bits of NaNs. See
- // https://llvm.org/bugs/show_bug.cgi?id=31702.
-
- if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
- return !CFP->getValueAPF().isNegative() ||
- (!SignBitOnly && CFP->getValueAPF().isZero());
- }
-
- // Handle vector of constants.
- if (auto *CV = dyn_cast<Constant>(V)) {
- if (auto *CVFVTy = dyn_cast<FixedVectorType>(CV->getType())) {
- unsigned NumElts = CVFVTy->getNumElements();
- for (unsigned i = 0; i != NumElts; ++i) {
- auto *CFP = dyn_cast_or_null<ConstantFP>(CV->getAggregateElement(i));
- if (!CFP)
- return false;
- if (CFP->getValueAPF().isNegative() &&
- (SignBitOnly || !CFP->getValueAPF().isZero()))
- return false;
- }
-
- // All non-negative ConstantFPs.
- return true;
- }
- }
-
- if (Depth == MaxAnalysisRecursionDepth)
- return false;
-
- const Operator *I = dyn_cast<Operator>(V);
- if (!I)
- return false;
-
- switch (I->getOpcode()) {
- default:
- break;
- // Unsigned integers are always nonnegative.
- case Instruction::UIToFP:
- return true;
- case Instruction::FDiv:
- // X / X is always exactly 1.0 or a NaN.
- if (I->getOperand(0) == I->getOperand(1) &&
- (!SignBitOnly || cast<FPMathOperator>(I)->hasNoNaNs()))
- return true;
-
- // Set SignBitOnly for RHS, because X / -0.0 is -Inf (or NaN).
- return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI,
- SignBitOnly, Depth + 1) &&
- cannotBeOrderedLessThanZeroImpl(I->getOperand(1), DL, TLI,
- /*SignBitOnly*/ true, Depth + 1);
- case Instruction::FMul:
- // X * X is always non-negative or a NaN.
- if (I->getOperand(0) == I->getOperand(1) &&
- (!SignBitOnly || cast<FPMathOperator>(I)->hasNoNaNs()))
- return true;
-
- [[fallthrough]];
- case Instruction::FAdd:
- case Instruction::FRem:
- return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI,
- SignBitOnly, Depth + 1) &&
- cannotBeOrderedLessThanZeroImpl(I->getOperand(1), DL, TLI,
- SignBitOnly, Depth + 1);
- case Instruction::Select:
- return cannotBeOrderedLessThanZeroImpl(I->getOperand(1), DL, TLI,
- SignBitOnly, Depth + 1) &&
- cannotBeOrderedLessThanZeroImpl(I->getOperand(2), DL, TLI,
- SignBitOnly, Depth + 1);
- case Instruction::FPExt:
- case Instruction::FPTrunc:
- // Widening/narrowing never change sign.
- return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI,
- SignBitOnly, Depth + 1);
- case Instruction::ExtractElement:
- // Look through extract element. At the moment we keep this simple and skip
- // tracking the specific element. But at least we might find information
- // valid for all elements of the vector.
- return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI,
- SignBitOnly, Depth + 1);
- case Instruction::Call:
- const auto *CI = cast<CallInst>(I);
- Intrinsic::ID IID = getIntrinsicForCallSite(*CI, TLI);
- switch (IID) {
- default:
- break;
- case Intrinsic::canonicalize:
- case Intrinsic::arithmetic_fence:
- case Intrinsic::floor:
- case Intrinsic::ceil:
- case Intrinsic::trunc:
- case Intrinsic::rint:
- case Intrinsic::nearbyint:
- case Intrinsic::round:
- case Intrinsic::roundeven:
- case Intrinsic::fptrunc_round:
- return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI,
- SignBitOnly, Depth + 1);
- case Intrinsic::maxnum: {
- Value *V0 = I->getOperand(0), *V1 = I->getOperand(1);
- auto isPositiveNum = [&](Value *V) {
- if (SignBitOnly) {
- // With SignBitOnly, this is tricky because the result of
- // maxnum(+0.0, -0.0) is unspecified. Just check if the operand is
- // a constant strictly greater than 0.0.
- const APFloat *C;
- return match(V, m_APFloat(C)) &&
- *C > APFloat::getZero(C->getSemantics());
- }
-
- // -0.0 compares equal to 0.0, so if this operand is at least -0.0,
- // maxnum can't be ordered-less-than-zero.
- return isKnownNeverNaN(V, DL, TLI) &&
- cannotBeOrderedLessThanZeroImpl(V, DL, TLI, false, Depth + 1);
- };
-
- // TODO: This could be improved. We could also check that neither operand
- // has its sign bit set (and at least 1 is not-NAN?).
- return isPositiveNum(V0) || isPositiveNum(V1);
- }
-
- case Intrinsic::maximum:
- return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI,
- SignBitOnly, Depth + 1) ||
- cannotBeOrderedLessThanZeroImpl(I->getOperand(1), DL, TLI,
- SignBitOnly, Depth + 1);
- case Intrinsic::minnum:
- case Intrinsic::minimum:
- return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI,
- SignBitOnly, Depth + 1) &&
- cannotBeOrderedLessThanZeroImpl(I->getOperand(1), DL, TLI,
- SignBitOnly, Depth + 1);
- case Intrinsic::exp:
- case Intrinsic::exp2:
- case Intrinsic::fabs:
- return true;
- case Intrinsic::copysign:
- // Only the sign operand matters.
- return cannotBeOrderedLessThanZeroImpl(I->getOperand(1), DL, TLI, true,
- Depth + 1);
- case Intrinsic::sqrt:
- // sqrt(x) is always >= -0 or NaN. Moreover, sqrt(x) == -0 iff x == -0.
- if (!SignBitOnly)
- return true;
- return CI->hasNoNaNs() &&
- (CI->hasNoSignedZeros() ||
- cannotBeNegativeZero(CI->getOperand(0), DL, TLI));
-
- case Intrinsic::powi:
- if (ConstantInt *Exponent = dyn_cast<ConstantInt>(I->getOperand(1))) {
- // powi(x,n) is non-negative if n is even.
- if (Exponent->getBitWidth() <= 64 && Exponent->getSExtValue() % 2u == 0)
- return true;
- }
- // TODO: This is not correct. Given that exp is an integer, here are the
- // ways that pow can return a negative value:
- //
- // pow(x, exp) --> negative if exp is odd and x is negative.
- // pow(-0, exp) --> -inf if exp is negative odd.
- // pow(-0, exp) --> -0 if exp is positive odd.
- // pow(-inf, exp) --> -0 if exp is negative odd.
- // pow(-inf, exp) --> -inf if exp is positive odd.
- //
- // Therefore, if !SignBitOnly, we can return true if x >= +0 or x is NaN,
- // but we must return false if x == -0. Unfortunately we do not currently
- // have a way of expressing this constraint. See details in
- // https://llvm.org/bugs/show_bug.cgi?id=31702.
- return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI,
- SignBitOnly, Depth + 1);
-
- case Intrinsic::fma:
- case Intrinsic::fmuladd:
- // x*x+y is non-negative if y is non-negative.
- return I->getOperand(0) == I->getOperand(1) &&
- (!SignBitOnly || cast<FPMathOperator>(I)->hasNoNaNs()) &&
- cannotBeOrderedLessThanZeroImpl(I->getOperand(2), DL, TLI,
- SignBitOnly, Depth + 1);
- }
- break;
- }
- return false;
-}
-
-bool llvm::SignBitMustBeZero(const Value *V, const DataLayout &DL,
- const TargetLibraryInfo *TLI) {
- // FIXME: Use computeKnownFPClass and pass all arguments
- return cannotBeOrderedLessThanZeroImpl(V, DL, TLI, true, 0);
-}
-
/// Return true if it's possible to assume IEEE treatment of input denormals in
/// \p F for \p Val.
static bool inputDenormalIsIEEE(const Function &F, const Type *Ty) {
@@ -4340,7 +4141,6 @@ static void computeKnownFPClassForFPTrunc(const Operator *Op,
// Infinity needs a range check.
}
-// TODO: Merge implementation of cannotBeOrderedLessThanZero into here.
void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
FPClassTest InterestedClasses, KnownFPClass &Known,
unsigned Depth, const SimplifyQuery &Q) {
@@ -4365,6 +4165,8 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
const Constant *CV = dyn_cast<Constant>(V);
if (VFVTy && CV) {
Known.KnownFPClasses = fcNone;
+ bool SignBitAllZero = true;
+ bool SignBitAllOne = true;
// For vectors, verify that each element is not NaN.
unsigned NumElts = VFVTy->getNumElements();
@@ -4382,10 +4184,15 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
return;
}
- KnownFPClass KnownElt{CElt->getValueAPF().classify(), CElt->isNegative()};
- Known |= KnownElt;
+ const APFloat &C = CElt->getValueAPF();
+ Known.KnownFPClasses |= C.classify();
+ if (C.isNegative())
+ SignBitAllZero = false;
+ else
+ SignBitAllOne = false;
}
-
+ if (SignBitAllOne != SignBitAllZero)
+ Known.SignBit = SignBitAllOne;
return;
}
@@ -4521,7 +4328,6 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
computeKnownFPClass(II->getArgOperand(2), DemandedElts, InterestedClasses,
KnownAddend, Depth + 1, Q);
- // TODO: Known sign bit with no nans
if (KnownAddend.cannotBeOrderedLessThanZero())
Known.knownNot(fcNegative);
break;
@@ -4555,7 +4361,7 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
(F && KnownSrc.isKnownNeverLogicalNegZero(*F, II->getType()))) {
Known.knownNot(fcNegZero);
if (KnownSrc.isKnownNeverNaN())
- Known.SignBit = false;
+ Known.signBitMustBeZero();
}
break;
@@ -4625,7 +4431,6 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
// subtargets on AMDGPU the min/max instructions would not flush the
// output and return the original value.
//
- // TODO: This could be refined based on the sign
if ((Known.KnownFPClasses & fcZero) != fcNone &&
!Known.isKnownNeverSubnormal()) {
const Function *Parent = II->getFunction();
@@ -4638,6 +4443,26 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
Known.KnownFPClasses |= fcZero;
}
+ if (Known.isKnownNeverNaN()) {
+ if (KnownLHS.SignBit && KnownRHS.SignBit &&
+ *KnownLHS.SignBit == *KnownRHS.SignBit) {
+ if (*KnownLHS.SignBit)
+ Known.signBitMustBeOne();
+ else
+ Known.signBitMustBeZero();
+ } else if ((IID == Intrinsic::maximum || IID == Intrinsic::minimum) ||
+ ((KnownLHS.isKnownNeverNegZero() ||
+ KnownRHS.isKnownNeverPosZero()) &&
+ (KnownLHS.isKnownNeverPosZero() ||
+ KnownRHS.isKnownNeverNegZero()))) {
+ if ((IID == Intrinsic::maximum || IID == Intrinsic::maxnum) &&
+ (KnownLHS.SignBit == false || KnownRHS.SignBit == false))
+ Known.signBitMustBeZero();
+ else if ((IID == Intrinsic::minimum || IID == Intrinsic::minnum) &&
+ (KnownLHS.SignBit == true || KnownRHS.SignBit == true))
+ Known.signBitMustBeOne();
+ }
+ }
break;
}
case Intrinsic::canonicalize: {
@@ -4737,7 +4562,7 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
KnownSrc, Depth + 1, Q);
if (KnownSrc.isKnownNeverNaN()) {
Known.knownNot(fcNan);
- Known.SignBit = false;
+ Known.signBitMustBeZero();
}
break;
@@ -4987,6 +4812,13 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
if (!KnownLHS.isKnownNeverNaN())
break;
+ if (KnownLHS.SignBit && KnownRHS.SignBit) {
+ if (*KnownLHS.SignBit == *KnownRHS.SignBit)
+ Known.signBitMustBeZero();
+ else
+ Known.signBitMustBeOne();
+ }
+
// If 0 * +/-inf produces NaN.
if (KnownLHS.isKnownNeverInfinity() && KnownRHS.isKnownNeverInfinity()) {
Known.knownNot(fcNan);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 3b7fe7fa226607..232913063d49e2 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -2408,19 +2408,19 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
}
case Intrinsic::copysign: {
Value *Mag = II->getArgOperand(0), *Sign = II->getArgOperand(1);
- if (SignBitMustBeZero(Sign, DL, &TLI)) {
- // If we know that the sign argument is positive, reduce to FABS:
- // copysign Mag, +Sign --> fabs Mag
- Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, Mag, II);
- return replaceInstUsesWith(*II, Fabs);
- }
- // TODO: There should be a ValueTracking sibling like SignBitMustBeOne.
- const APFloat *C;
- if (match(Sign, m_APFloat(C)) && C->isNegative()) {
- // If we know that the sign argument is negative, reduce to FNABS:
- // copysign Mag, -Sign --> fneg (fabs Mag)
- Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, Mag, II);
- return replaceInstUsesWith(*II, Builder.CreateFNegFMF(Fabs, II));
+ if (std::optional<bool> KnownSignBit = computeKnownFPSignBit(
+ Sign, getDataLayout(), &TLI, /*Depth*/ 0, &AC, II, &DT)) {
+ if (*KnownSignBit) {
+ // If we know that the sign argument is negative, reduce to FNABS:
+ // copysign Mag, -Sign --> fneg (fabs Mag)
+ Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, Mag, II);
+ return replaceInstUsesWith(*II, Builder.CreateFNegFMF(Fabs, II));
+ } else {
+ // If we know that the sign argument is positive, reduce to FABS:
+ // copysign Mag, +Sign --> fabs Mag
+ Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, Mag, II);
+ return replaceInstUsesWith(*II, Fabs);
+ }
}
// Propagate sign argument through nested calls:
diff --git a/llvm/test/CodeGen/Thumb2/mve-vmaxnma-commute.ll b/llvm/test/CodeGen/Thumb2/mve-vmaxnma-commute.ll
index 0a18279a57ef1c..e6cb00273a2739 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vmaxnma-commute.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vmaxnma-commute.ll
@@ -203,7 +203,8 @@ define void @loop_absmax32(float* nocapture readonly %0, i32 %1, float* nocaptur
; CHECK-NEXT: vmov.i32 q0, #0x0
; CHECK-NEXT: .LBB16_2: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrw.u32 q1, [r0], #16
-; CHECK-NEXT: vmaxnma.f32 q0, q1
+; CHECK-NEXT: vabs.f32 q1, q1
+; CHECK-NEXT: vmaxnm.f32 q0, q0, q1
; CHECK-NEXT: le lr, .LBB16_2
; CHECK-NEXT: .LBB16_3:
; CHECK-NEXT: vldr s4, .LCPI16_0
@@ -253,7 +254,8 @@ define void @loop_absmax32_c(float* nocapture readonly %0, i32 %1, float* nocapt
; CHECK-NEXT: vmov.i32 q0, #0x0
; CHECK-NEXT: .LBB17_2: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrw.u32 q1, [r0], #16
-; CHECK-NEXT: vmaxnma.f32 q0, q1
+; CHECK-NEXT: vabs.f32 q1, q1
+; CHECK-NEXT: vmaxnm.f32 q0, q1, q0
; CHECK-NEXT: le lr, .LBB17_2
; CHECK-NEXT: .LBB17_3:
; CHECK-NEXT: vldr s4, .LCPI17_0
@@ -395,7 +397,8 @@ define void @loop_absmax16(half* nocapture readonly %0, i32 %1, half* nocapture
; CHECK-NEXT: vmov.i32 q0, #0x0
; CHECK-NEXT: .LBB20_2: @ =>This Inner Loop Heade...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/76360
More information about the llvm-commits
mailing list