[llvm] [ValueTracking] Merge `cannotBeOrderedLessThanZeroImpl` into `computeKnownFPClass` (PR #76360)
Yingwei Zheng via llvm-commits
llvm-commits at lists.llvm.org
Sun Jan 7 05:07:47 PST 2024
https://github.com/dtcxzyw updated https://github.com/llvm/llvm-project/pull/76360
>From a4098bc544e6f80042deeb354b1c5e5128d3dc08 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Sun, 7 Jan 2024 21:00:23 +0800
Subject: [PATCH 1/2] [ValueTracking] Add additional tests for `llvm.powi`.
NFC.
---
llvm/test/Analysis/ValueTracking/powi-nneg.ll | 51 +++++++++++++++++++
1 file changed, 51 insertions(+)
create mode 100644 llvm/test/Analysis/ValueTracking/powi-nneg.ll
diff --git a/llvm/test/Analysis/ValueTracking/powi-nneg.ll b/llvm/test/Analysis/ValueTracking/powi-nneg.ll
new file mode 100644
index 00000000000000..8a46613a0c9ee3
--- /dev/null
+++ b/llvm/test/Analysis/ValueTracking/powi-nneg.ll
@@ -0,0 +1,51 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -passes=instsimplify -S < %s | FileCheck %s
+
+define i1 @test_powi_even_exp(float %x) {
+; CHECK-LABEL: define i1 @test_powi_even_exp(
+; CHECK-SAME: float [[X:%.*]]) {
+; CHECK-NEXT: ret i1 false
+;
+ %y = call float @llvm.powi.f32(float %x, i32 8)
+ %cmp = fcmp olt float %y, 0.0
+ ret i1 %cmp
+}
+
+define i1 @test_powi_base(float nofpclass(ninf nnorm nsub nzero) %x, i32 %e) {
+; CHECK-LABEL: define i1 @test_powi_base(
+; CHECK-SAME: float nofpclass(ninf nzero nsub nnorm) [[X:%.*]], i32 [[E:%.*]]) {
+; CHECK-NEXT: ret i1 false
+;
+ %y = call float @llvm.powi.f32(float %x, i32 %e)
+ %cmp = fcmp olt float %y, 0.0
+ ret i1 %cmp
+}
+
+define i1 @test_powi_base_without_nsz(float nofpclass(ninf nnorm nsub) %x, i32 %e) {
+; CHECK-LABEL: define i1 @test_powi_base_without_nsz(
+; CHECK-SAME: float nofpclass(ninf nsub nnorm) [[X:%.*]], i32 [[E:%.*]]) {
+; CHECK-NEXT: [[Y:%.*]] = call float @llvm.powi.f32.i32(float [[X]], i32 [[E]])
+; CHECK-NEXT: [[CMP:%.*]] = fcmp olt float [[Y]], 0.000000e+00
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %y = call float @llvm.powi.f32(float %x, i32 %e)
+ %cmp = fcmp olt float %y, 0.0
+ ret i1 %cmp
+}
+
+define float @pr31702(float %x, i32 %n) {
+; CHECK-LABEL: define float @pr31702(
+; CHECK-SAME: float [[X:%.*]], i32 [[N:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CALL1:%.*]] = call float @llvm.powi.f32.i32(float -0.000000e+00, i32 [[N]])
+; CHECK-NEXT: [[CALL2:%.*]] = call float @llvm.fabs.f32(float [[CALL1]])
+; CHECK-NEXT: ret float [[CALL2]]
+;
+entry:
+ %call1 = call float @llvm.powi.f32(float -0.0, i32 %n)
+ %call2 = call float @llvm.fabs.f32(float %call1)
+ ret float %call2
+}
+
+declare float @llvm.powi.f32(float, i32)
+declare float @llvm.fabs.f32(float)
>From 82d1d5005236ff229defccd76fb564e009453ec2 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333 at gmail.com>
Date: Sun, 7 Jan 2024 21:06:54 +0800
Subject: [PATCH 2/2] [ValueTracking] Merge `cannotBeOrderedLessThanZeroImpl`
into `computeKnownFPClass`
---
llvm/include/llvm/Analysis/ValueTracking.h | 33 ++-
llvm/lib/Analysis/InstructionSimplify.cpp | 9 +-
llvm/lib/Analysis/ValueTracking.cpp | 246 +++---------------
.../InstCombine/InstCombineCalls.cpp | 18 +-
.../CodeGen/Thumb2/mve-vmaxnma-commute.ll | 12 +-
.../InstCombine/copysign-fneg-fabs.ll | 5 +-
.../floating-point-arithmetic-strictfp.ll | 8 +-
7 files changed, 89 insertions(+), 242 deletions(-)
diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h
index 7360edfce1f39a..90075cb88c3a09 100644
--- a/llvm/include/llvm/Analysis/ValueTracking.h
+++ b/llvm/include/llvm/Analysis/ValueTracking.h
@@ -332,6 +332,12 @@ struct KnownFPClass {
void knownNot(FPClassTest RuleOut) {
KnownFPClasses = KnownFPClasses & ~RuleOut;
+ if (isKnownNever(fcNan) && !SignBit) {
+ if (isKnownNever(OrderedLessThanZeroMask | fcNegZero))
+ SignBit = false;
+ else if (isKnownNever(OrderedGreaterThanZeroMask | fcPosZero))
+ SignBit = true;
+ }
}
void fneg() {
@@ -367,6 +373,12 @@ struct KnownFPClass {
SignBit = false;
}
+ /// Assume the sign bit is one.
+ void signBitMustBeOne() {
+ KnownFPClasses &= (fcNegative | fcNan);
+ SignBit = true;
+ }
+
void copysign(const KnownFPClass &Sign) {
// Don't know anything about the sign of the source. Expand the possible set
// to its opposite sign pair.
@@ -553,15 +565,18 @@ inline bool isKnownNeverNaN(const Value *V, const DataLayout &DL,
return Known.isKnownNeverNaN();
}
-/// Return true if we can prove that the specified FP value's sign bit is 0.
-///
-/// NaN --> true/false (depending on the NaN's sign bit)
-/// +0 --> true
-/// -0 --> false
-/// x > +0 --> true
-/// x < -0 --> false
-bool SignBitMustBeZero(const Value *V, const DataLayout &DL,
- const TargetLibraryInfo *TLI);
+/// Return false if we can prove that the specified FP value's sign bit is 0.
+/// Return true if we can prove that the specified FP value's sign bit is 1.
+/// Otherwise return std::nullopt.
+inline std::optional<bool> computeKnownFPSignBit(
+ const Value *V, const DataLayout &DL,
+ const TargetLibraryInfo *TLI = nullptr, unsigned Depth = 0,
+ AssumptionCache *AC = nullptr, const Instruction *CtxI = nullptr,
+ const DominatorTree *DT = nullptr, bool UseInstrInfo = true) {
+ KnownFPClass Known = computeKnownFPClass(V, DL, fcAllFlags, Depth, TLI, AC,
+ CtxI, DT, UseInstrInfo);
+ return Known.SignBit;
+}
/// If the specified value can be set by repeating the same byte in memory,
/// return the i8 value that it is represented with. This is true for all i8
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 241bdd81b75a97..02eef4b5ee2406 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -5762,9 +5762,9 @@ static Value *simplifyFMAFMul(Value *Op0, Value *Op1, FastMathFlags FMF,
return ConstantFP::getZero(Op0->getType());
// +normal number * (-)0.0 --> (-)0.0
- if (isKnownNeverInfOrNaN(Op0, Q.DL, Q.TLI, 0, Q.AC, Q.CxtI, Q.DT) &&
- // TODO: Check SignBit from computeKnownFPClass when it's more complete.
- SignBitMustBeZero(Op0, Q.DL, Q.TLI))
+ KnownFPClass Known = computeKnownFPClass(
+ Op0, FMF, Q.DL, fcInf | fcNan, /*Depth=*/0, Q.TLI, Q.AC, Q.CxtI, Q.DT);
+ if (Known.SignBit == false && Known.isKnownNever(fcInf | fcNan))
return Op1;
}
@@ -6217,7 +6217,8 @@ static Value *simplifyUnaryIntrinsic(Function *F, Value *Op0,
Value *X;
switch (IID) {
case Intrinsic::fabs:
- if (SignBitMustBeZero(Op0, Q.DL, Q.TLI))
+ if (computeKnownFPSignBit(Op0, Q.DL, Q.TLI, /*Depth=*/0, Q.AC, Q.CxtI,
+ Q.DT) == false)
return Op0;
break;
case Intrinsic::bswap:
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 940ae9eb7ee29a..7c49ddd1754552 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -3704,205 +3704,6 @@ Intrinsic::ID llvm::getIntrinsicForCallSite(const CallBase &CB,
return Intrinsic::not_intrinsic;
}
-/// Deprecated, use computeKnownFPClass instead.
-///
-/// If \p SignBitOnly is true, test for a known 0 sign bit rather than a
-/// standard ordered compare. e.g. make -0.0 olt 0.0 be true because of the sign
-/// bit despite comparing equal.
-static bool cannotBeOrderedLessThanZeroImpl(const Value *V,
- const DataLayout &DL,
- const TargetLibraryInfo *TLI,
- bool SignBitOnly, unsigned Depth) {
- // TODO: This function does not do the right thing when SignBitOnly is true
- // and we're lowering to a hypothetical IEEE 754-compliant-but-evil platform
- // which flips the sign bits of NaNs. See
- // https://llvm.org/bugs/show_bug.cgi?id=31702.
-
- if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
- return !CFP->getValueAPF().isNegative() ||
- (!SignBitOnly && CFP->getValueAPF().isZero());
- }
-
- // Handle vector of constants.
- if (auto *CV = dyn_cast<Constant>(V)) {
- if (auto *CVFVTy = dyn_cast<FixedVectorType>(CV->getType())) {
- unsigned NumElts = CVFVTy->getNumElements();
- for (unsigned i = 0; i != NumElts; ++i) {
- auto *CFP = dyn_cast_or_null<ConstantFP>(CV->getAggregateElement(i));
- if (!CFP)
- return false;
- if (CFP->getValueAPF().isNegative() &&
- (SignBitOnly || !CFP->getValueAPF().isZero()))
- return false;
- }
-
- // All non-negative ConstantFPs.
- return true;
- }
- }
-
- if (Depth == MaxAnalysisRecursionDepth)
- return false;
-
- const Operator *I = dyn_cast<Operator>(V);
- if (!I)
- return false;
-
- switch (I->getOpcode()) {
- default:
- break;
- // Unsigned integers are always nonnegative.
- case Instruction::UIToFP:
- return true;
- case Instruction::FDiv:
- // X / X is always exactly 1.0 or a NaN.
- if (I->getOperand(0) == I->getOperand(1) &&
- (!SignBitOnly || cast<FPMathOperator>(I)->hasNoNaNs()))
- return true;
-
- // Set SignBitOnly for RHS, because X / -0.0 is -Inf (or NaN).
- return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI,
- SignBitOnly, Depth + 1) &&
- cannotBeOrderedLessThanZeroImpl(I->getOperand(1), DL, TLI,
- /*SignBitOnly*/ true, Depth + 1);
- case Instruction::FMul:
- // X * X is always non-negative or a NaN.
- if (I->getOperand(0) == I->getOperand(1) &&
- (!SignBitOnly || cast<FPMathOperator>(I)->hasNoNaNs()))
- return true;
-
- [[fallthrough]];
- case Instruction::FAdd:
- case Instruction::FRem:
- return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI,
- SignBitOnly, Depth + 1) &&
- cannotBeOrderedLessThanZeroImpl(I->getOperand(1), DL, TLI,
- SignBitOnly, Depth + 1);
- case Instruction::Select:
- return cannotBeOrderedLessThanZeroImpl(I->getOperand(1), DL, TLI,
- SignBitOnly, Depth + 1) &&
- cannotBeOrderedLessThanZeroImpl(I->getOperand(2), DL, TLI,
- SignBitOnly, Depth + 1);
- case Instruction::FPExt:
- case Instruction::FPTrunc:
- // Widening/narrowing never change sign.
- return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI,
- SignBitOnly, Depth + 1);
- case Instruction::ExtractElement:
- // Look through extract element. At the moment we keep this simple and skip
- // tracking the specific element. But at least we might find information
- // valid for all elements of the vector.
- return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI,
- SignBitOnly, Depth + 1);
- case Instruction::Call:
- const auto *CI = cast<CallInst>(I);
- Intrinsic::ID IID = getIntrinsicForCallSite(*CI, TLI);
- switch (IID) {
- default:
- break;
- case Intrinsic::canonicalize:
- case Intrinsic::arithmetic_fence:
- case Intrinsic::floor:
- case Intrinsic::ceil:
- case Intrinsic::trunc:
- case Intrinsic::rint:
- case Intrinsic::nearbyint:
- case Intrinsic::round:
- case Intrinsic::roundeven:
- case Intrinsic::fptrunc_round:
- return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI,
- SignBitOnly, Depth + 1);
- case Intrinsic::maxnum: {
- Value *V0 = I->getOperand(0), *V1 = I->getOperand(1);
- auto isPositiveNum = [&](Value *V) {
- if (SignBitOnly) {
- // With SignBitOnly, this is tricky because the result of
- // maxnum(+0.0, -0.0) is unspecified. Just check if the operand is
- // a constant strictly greater than 0.0.
- const APFloat *C;
- return match(V, m_APFloat(C)) &&
- *C > APFloat::getZero(C->getSemantics());
- }
-
- // -0.0 compares equal to 0.0, so if this operand is at least -0.0,
- // maxnum can't be ordered-less-than-zero.
- return isKnownNeverNaN(V, DL, TLI) &&
- cannotBeOrderedLessThanZeroImpl(V, DL, TLI, false, Depth + 1);
- };
-
- // TODO: This could be improved. We could also check that neither operand
- // has its sign bit set (and at least 1 is not-NAN?).
- return isPositiveNum(V0) || isPositiveNum(V1);
- }
-
- case Intrinsic::maximum:
- return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI,
- SignBitOnly, Depth + 1) ||
- cannotBeOrderedLessThanZeroImpl(I->getOperand(1), DL, TLI,
- SignBitOnly, Depth + 1);
- case Intrinsic::minnum:
- case Intrinsic::minimum:
- return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI,
- SignBitOnly, Depth + 1) &&
- cannotBeOrderedLessThanZeroImpl(I->getOperand(1), DL, TLI,
- SignBitOnly, Depth + 1);
- case Intrinsic::exp:
- case Intrinsic::exp2:
- case Intrinsic::fabs:
- return true;
- case Intrinsic::copysign:
- // Only the sign operand matters.
- return cannotBeOrderedLessThanZeroImpl(I->getOperand(1), DL, TLI, true,
- Depth + 1);
- case Intrinsic::sqrt:
- // sqrt(x) is always >= -0 or NaN. Moreover, sqrt(x) == -0 iff x == -0.
- if (!SignBitOnly)
- return true;
- return CI->hasNoNaNs() &&
- (CI->hasNoSignedZeros() ||
- cannotBeNegativeZero(CI->getOperand(0), DL, TLI));
-
- case Intrinsic::powi:
- if (ConstantInt *Exponent = dyn_cast<ConstantInt>(I->getOperand(1))) {
- // powi(x,n) is non-negative if n is even.
- if (Exponent->getBitWidth() <= 64 && Exponent->getSExtValue() % 2u == 0)
- return true;
- }
- // TODO: This is not correct. Given that exp is an integer, here are the
- // ways that pow can return a negative value:
- //
- // pow(x, exp) --> negative if exp is odd and x is negative.
- // pow(-0, exp) --> -inf if exp is negative odd.
- // pow(-0, exp) --> -0 if exp is positive odd.
- // pow(-inf, exp) --> -0 if exp is negative odd.
- // pow(-inf, exp) --> -inf if exp is positive odd.
- //
- // Therefore, if !SignBitOnly, we can return true if x >= +0 or x is NaN,
- // but we must return false if x == -0. Unfortunately we do not currently
- // have a way of expressing this constraint. See details in
- // https://llvm.org/bugs/show_bug.cgi?id=31702.
- return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI,
- SignBitOnly, Depth + 1);
-
- case Intrinsic::fma:
- case Intrinsic::fmuladd:
- // x*x+y is non-negative if y is non-negative.
- return I->getOperand(0) == I->getOperand(1) &&
- (!SignBitOnly || cast<FPMathOperator>(I)->hasNoNaNs()) &&
- cannotBeOrderedLessThanZeroImpl(I->getOperand(2), DL, TLI,
- SignBitOnly, Depth + 1);
- }
- break;
- }
- return false;
-}
-
-bool llvm::SignBitMustBeZero(const Value *V, const DataLayout &DL,
- const TargetLibraryInfo *TLI) {
- // FIXME: Use computeKnownFPClass and pass all arguments
- return cannotBeOrderedLessThanZeroImpl(V, DL, TLI, true, 0);
-}
-
/// Return true if it's possible to assume IEEE treatment of input denormals in
/// \p F for \p Val.
static bool inputDenormalIsIEEE(const Function &F, const Type *Ty) {
@@ -4307,7 +4108,6 @@ static void computeKnownFPClassForFPTrunc(const Operator *Op,
// Infinity needs a range check.
}
-// TODO: Merge implementation of cannotBeOrderedLessThanZero into here.
void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
FPClassTest InterestedClasses, KnownFPClass &Known,
unsigned Depth, const SimplifyQuery &Q) {
@@ -4332,6 +4132,8 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
const Constant *CV = dyn_cast<Constant>(V);
if (VFVTy && CV) {
Known.KnownFPClasses = fcNone;
+ bool SignBitAllZero = true;
+ bool SignBitAllOne = true;
// For vectors, verify that each element is not NaN.
unsigned NumElts = VFVTy->getNumElements();
@@ -4349,10 +4151,15 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
return;
}
- KnownFPClass KnownElt{CElt->getValueAPF().classify(), CElt->isNegative()};
- Known |= KnownElt;
+ const APFloat &C = CElt->getValueAPF();
+ Known.KnownFPClasses |= C.classify();
+ if (C.isNegative())
+ SignBitAllZero = false;
+ else
+ SignBitAllOne = false;
}
-
+ if (SignBitAllOne != SignBitAllZero)
+ Known.SignBit = SignBitAllOne;
return;
}
@@ -4488,7 +4295,6 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
computeKnownFPClass(II->getArgOperand(2), DemandedElts, InterestedClasses,
KnownAddend, Depth + 1, Q);
- // TODO: Known sign bit with no nans
if (KnownAddend.cannotBeOrderedLessThanZero())
Known.knownNot(fcNegative);
break;
@@ -4522,7 +4328,7 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
(F && KnownSrc.isKnownNeverLogicalNegZero(*F, II->getType()))) {
Known.knownNot(fcNegZero);
if (KnownSrc.isKnownNeverNaN())
- Known.SignBit = false;
+ Known.signBitMustBeZero();
}
break;
@@ -4592,7 +4398,6 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
// subtargets on AMDGPU the min/max instructions would not flush the
// output and return the original value.
//
- // TODO: This could be refined based on the sign
if ((Known.KnownFPClasses & fcZero) != fcNone &&
!Known.isKnownNeverSubnormal()) {
const Function *Parent = II->getFunction();
@@ -4605,6 +4410,26 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
Known.KnownFPClasses |= fcZero;
}
+ if (Known.isKnownNeverNaN()) {
+ if (KnownLHS.SignBit && KnownRHS.SignBit &&
+ *KnownLHS.SignBit == *KnownRHS.SignBit) {
+ if (*KnownLHS.SignBit)
+ Known.signBitMustBeOne();
+ else
+ Known.signBitMustBeZero();
+ } else if ((IID == Intrinsic::maximum || IID == Intrinsic::minimum) ||
+ ((KnownLHS.isKnownNeverNegZero() ||
+ KnownRHS.isKnownNeverPosZero()) &&
+ (KnownLHS.isKnownNeverPosZero() ||
+ KnownRHS.isKnownNeverNegZero()))) {
+ if ((IID == Intrinsic::maximum || IID == Intrinsic::maxnum) &&
+ (KnownLHS.SignBit == false || KnownRHS.SignBit == false))
+ Known.signBitMustBeZero();
+ else if ((IID == Intrinsic::minimum || IID == Intrinsic::minnum) &&
+ (KnownLHS.SignBit == true || KnownRHS.SignBit == true))
+ Known.signBitMustBeOne();
+ }
+ }
break;
}
case Intrinsic::canonicalize: {
@@ -4704,7 +4529,7 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
KnownSrc, Depth + 1, Q);
if (KnownSrc.isKnownNeverNaN()) {
Known.knownNot(fcNan);
- Known.SignBit = false;
+ Known.signBitMustBeZero();
}
break;
@@ -4954,6 +4779,13 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
if (!KnownLHS.isKnownNeverNaN())
break;
+ if (KnownLHS.SignBit && KnownRHS.SignBit) {
+ if (*KnownLHS.SignBit == *KnownRHS.SignBit)
+ Known.signBitMustBeZero();
+ else
+ Known.signBitMustBeOne();
+ }
+
// If 0 * +/-inf produces NaN.
if (KnownLHS.isKnownNeverInfinity() && KnownRHS.isKnownNeverInfinity()) {
Known.knownNot(fcNan);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 40b48699f75851..0174b5237de837 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -2406,20 +2406,20 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
}
case Intrinsic::copysign: {
Value *Mag = II->getArgOperand(0), *Sign = II->getArgOperand(1);
- if (SignBitMustBeZero(Sign, DL, &TLI)) {
+ if (std::optional<bool> KnownSignBit = computeKnownFPSignBit(
+ Sign, getDataLayout(), &TLI, /*Depth=*/0, &AC, II, &DT)) {
+ if (*KnownSignBit) {
+ // If we know that the sign argument is negative, reduce to FNABS:
+ // copysign Mag, -Sign --> fneg (fabs Mag)
+ Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, Mag, II);
+ return replaceInstUsesWith(*II, Builder.CreateFNegFMF(Fabs, II));
+ }
+
// If we know that the sign argument is positive, reduce to FABS:
// copysign Mag, +Sign --> fabs Mag
Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, Mag, II);
return replaceInstUsesWith(*II, Fabs);
}
- // TODO: There should be a ValueTracking sibling like SignBitMustBeOne.
- const APFloat *C;
- if (match(Sign, m_APFloat(C)) && C->isNegative()) {
- // If we know that the sign argument is negative, reduce to FNABS:
- // copysign Mag, -Sign --> fneg (fabs Mag)
- Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, Mag, II);
- return replaceInstUsesWith(*II, Builder.CreateFNegFMF(Fabs, II));
- }
// Propagate sign argument through nested calls:
// copysign Mag, (copysign ?, X) --> copysign Mag, X
diff --git a/llvm/test/CodeGen/Thumb2/mve-vmaxnma-commute.ll b/llvm/test/CodeGen/Thumb2/mve-vmaxnma-commute.ll
index 0a18279a57ef1c..e6cb00273a2739 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vmaxnma-commute.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vmaxnma-commute.ll
@@ -203,7 +203,8 @@ define void @loop_absmax32(float* nocapture readonly %0, i32 %1, float* nocaptur
; CHECK-NEXT: vmov.i32 q0, #0x0
; CHECK-NEXT: .LBB16_2: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrw.u32 q1, [r0], #16
-; CHECK-NEXT: vmaxnma.f32 q0, q1
+; CHECK-NEXT: vabs.f32 q1, q1
+; CHECK-NEXT: vmaxnm.f32 q0, q0, q1
; CHECK-NEXT: le lr, .LBB16_2
; CHECK-NEXT: .LBB16_3:
; CHECK-NEXT: vldr s4, .LCPI16_0
@@ -253,7 +254,8 @@ define void @loop_absmax32_c(float* nocapture readonly %0, i32 %1, float* nocapt
; CHECK-NEXT: vmov.i32 q0, #0x0
; CHECK-NEXT: .LBB17_2: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrw.u32 q1, [r0], #16
-; CHECK-NEXT: vmaxnma.f32 q0, q1
+; CHECK-NEXT: vabs.f32 q1, q1
+; CHECK-NEXT: vmaxnm.f32 q0, q1, q0
; CHECK-NEXT: le lr, .LBB17_2
; CHECK-NEXT: .LBB17_3:
; CHECK-NEXT: vldr s4, .LCPI17_0
@@ -395,7 +397,8 @@ define void @loop_absmax16(half* nocapture readonly %0, i32 %1, half* nocapture
; CHECK-NEXT: vmov.i32 q0, #0x0
; CHECK-NEXT: .LBB20_2: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrw.u32 q1, [r0], #8
-; CHECK-NEXT: vmaxnma.f16 q0, q1
+; CHECK-NEXT: vabs.f16 q1, q1
+; CHECK-NEXT: vmaxnm.f16 q0, q0, q1
; CHECK-NEXT: le lr, .LBB20_2
; CHECK-NEXT: .LBB20_3:
; CHECK-NEXT: vldr.16 s4, .LCPI20_0
@@ -445,7 +448,8 @@ define void @loop_absmax16_c(half* nocapture readonly %0, i32 %1, half* nocaptur
; CHECK-NEXT: vmov.i32 q0, #0x0
; CHECK-NEXT: .LBB21_2: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vldrw.u32 q1, [r0], #8
-; CHECK-NEXT: vmaxnma.f16 q0, q1
+; CHECK-NEXT: vabs.f16 q1, q1
+; CHECK-NEXT: vmaxnm.f16 q0, q1, q0
; CHECK-NEXT: le lr, .LBB21_2
; CHECK-NEXT: .LBB21_3:
; CHECK-NEXT: vldr.16 s4, .LCPI21_0
diff --git a/llvm/test/Transforms/InstCombine/copysign-fneg-fabs.ll b/llvm/test/Transforms/InstCombine/copysign-fneg-fabs.ll
index db839e86f25321..af939cf74399ab 100644
--- a/llvm/test/Transforms/InstCombine/copysign-fneg-fabs.ll
+++ b/llvm/test/Transforms/InstCombine/copysign-fneg-fabs.ll
@@ -55,9 +55,8 @@ define half @copysign_fabs_y(half %x, half %y) {
define half @copysign_fneg_fabs_y(half %x, half %y) {
; CHECK-LABEL: @copysign_fneg_fabs_y(
-; CHECK-NEXT: [[FABS_Y:%.*]] = call half @llvm.fabs.f16(half [[Y:%.*]])
-; CHECK-NEXT: [[FNEG_FABS_Y:%.*]] = fneg half [[FABS_Y]]
-; CHECK-NEXT: [[COPYSIGN:%.*]] = call half @llvm.copysign.f16(half [[X:%.*]], half [[FNEG_FABS_Y]])
+; CHECK-NEXT: [[TMP1:%.*]] = call half @llvm.fabs.f16(half [[X:%.*]])
+; CHECK-NEXT: [[COPYSIGN:%.*]] = fneg half [[TMP1]]
; CHECK-NEXT: ret half [[COPYSIGN]]
;
%fabs.y = call half @llvm.fabs.f16(half %y)
diff --git a/llvm/test/Transforms/InstSimplify/floating-point-arithmetic-strictfp.ll b/llvm/test/Transforms/InstSimplify/floating-point-arithmetic-strictfp.ll
index 7ee8a82ab5a2da..e4748a24029236 100644
--- a/llvm/test/Transforms/InstSimplify/floating-point-arithmetic-strictfp.ll
+++ b/llvm/test/Transforms/InstSimplify/floating-point-arithmetic-strictfp.ll
@@ -249,25 +249,21 @@ define float @fabs_sqrt_nsz(float %a) #0 {
ret float %fabs
}
-; The fabs can be eliminated because we're nsz and nnan.
define float @fabs_sqrt_nnan_nsz(float %a) #0 {
; CHECK-LABEL: @fabs_sqrt_nnan_nsz(
; CHECK-NEXT: [[SQRT:%.*]] = call nnan nsz float @llvm.experimental.constrained.sqrt.f32(float [[A:%.*]], metadata !"round.tonearest", metadata !"fpexcept.ignore")
-; CHECK-NEXT: [[FABS:%.*]] = call float @llvm.fabs.f32(float [[SQRT]]) #[[ATTR0]]
-; CHECK-NEXT: ret float [[FABS]]
+; CHECK-NEXT: ret float [[SQRT]]
;
%sqrt = call nnan nsz float @llvm.experimental.constrained.sqrt.f32(float %a, metadata !"round.tonearest", metadata !"fpexcept.ignore")
%fabs = call float @llvm.fabs.f32(float %sqrt) #0
ret float %fabs
}
-; The second fabs can be eliminated because the operand to sqrt cannot be -0.
define float @fabs_sqrt_nnan_fabs(float %a) #0 {
; CHECK-LABEL: @fabs_sqrt_nnan_fabs(
; CHECK-NEXT: [[B:%.*]] = call float @llvm.fabs.f32(float [[A:%.*]]) #[[ATTR0]]
; CHECK-NEXT: [[SQRT:%.*]] = call nnan float @llvm.experimental.constrained.sqrt.f32(float [[B]], metadata !"round.tonearest", metadata !"fpexcept.ignore")
-; CHECK-NEXT: [[FABS:%.*]] = call float @llvm.fabs.f32(float [[SQRT]]) #[[ATTR0]]
-; CHECK-NEXT: ret float [[FABS]]
+; CHECK-NEXT: ret float [[SQRT]]
;
%b = call float @llvm.fabs.f32(float %a) #0
%sqrt = call nnan float @llvm.experimental.constrained.sqrt.f32(float %b, metadata !"round.tonearest", metadata !"fpexcept.ignore")
More information about the llvm-commits
mailing list