[llvm-branch-commits] [llvm] ValueTracking: Special case fmul by llvm.amdgcn.trig.preop (PR #183373)
Matt Arsenault via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu Feb 26 07:49:20 PST 2026
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/183373
>From 618e4f59e6ccdc663680253a9962f58abfb275e3 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Wed, 25 Feb 2026 15:45:08 +0100
Subject: [PATCH 1/4] ValueTracking: Special case fmul by
llvm.amdgcn.trig.preop
This is another instance of the logic from #183159. If we know
one source is not-infinity, and the other source is less than or
equal to 1, this cannot overflow. Special case llvm.amdgcn.trig.preop,
as a substitute for proper range tracking. This almost enables pruning
edge case handling in trig function implementations, if not for the
recursion depth limit (but that's a problem for another day).
---
llvm/lib/Analysis/ValueTracking.cpp | 37 ++++--
.../AMDGPU/nofpclass-amdgcn-trig-preop.ll | 113 ++++++++++++++++++
2 files changed, 138 insertions(+), 12 deletions(-)
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 2684b41cd1e5d..5eb048d1fd8ff 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -4965,6 +4965,12 @@ static constexpr KnownFPClass::MinMaxKind getMinMaxKind(Intrinsic::ID IID) {
}
}
+/// \return true if this is a floating point value that is known to have a
+/// magintude smaller than 1. i.e., fabs(X) <=1.0
+static bool isAbsoluteValueLessEqualOne(const Value *V) {
+ return match(V, m_Intrinsic<Intrinsic::amdgcn_trig_preop>(m_Value()));
+}
+
void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
FPClassTest InterestedClasses, KnownFPClass &Known,
const SimplifyQuery &Q, unsigned Depth) {
@@ -5574,37 +5580,44 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
Op->getType()->getScalarType()->getFltSemantics())
: DenormalMode::getDynamic();
+ Value *LHS = Op->getOperand(0);
+ Value *RHS = Op->getOperand(1);
// X * X is always non-negative or a NaN.
// FIXME: Should check isGuaranteedNotToBeUndef
- if (Op->getOperand(0) == Op->getOperand(1)) {
+ if (LHS == RHS) {
KnownFPClass KnownSrc;
- computeKnownFPClass(Op->getOperand(0), DemandedElts, fcAllFlags, KnownSrc,
- Q, Depth + 1);
+ computeKnownFPClass(LHS, DemandedElts, fcAllFlags, KnownSrc, Q,
+ Depth + 1);
Known = KnownFPClass::square(KnownSrc, Mode);
break;
}
+ KnownFPClass KnownLHS, KnownRHS;
+
const APFloat *CRHS;
- if (match(Op->getOperand(1), m_APFloat(CRHS))) {
- KnownFPClass KnownLHS;
+ if (match(RHS, m_APFloat(CRHS))) {
computeKnownFPClass(Op->getOperand(0), DemandedElts, fcAllFlags, KnownLHS,
Q, Depth + 1);
-
Known = KnownFPClass::fmul(KnownLHS, *CRHS, Mode);
} else {
- KnownFPClass KnownLHS, KnownRHS;
-
- computeKnownFPClass(Op->getOperand(1), DemandedElts, fcAllFlags, KnownRHS,
- Q, Depth + 1);
+ computeKnownFPClass(RHS, DemandedElts, fcAllFlags, KnownRHS, Q,
+ Depth + 1);
// TODO: Improve accuracy in unfused FMA pattern. We can prove an
// additional not-nan if the addend is known-not negative infinity if the
// multiply is known-not infinity.
- computeKnownFPClass(Op->getOperand(0), DemandedElts, fcAllFlags, KnownLHS,
- Q, Depth + 1);
+ computeKnownFPClass(LHS, DemandedElts, fcAllFlags, KnownLHS, Q,
+ Depth + 1);
Known = KnownFPClass::fmul(KnownLHS, KnownRHS, Mode);
}
+ /// Propgate no-infs if the other source is known smaller than one, such
+ /// that this cannot introduce overflow.
+ if (KnownLHS.isKnownNever(fcInf) && isAbsoluteValueLessEqualOne(RHS))
+ Known.knownNot(fcInf);
+ else if (KnownRHS.isKnownNever(fcInf) && isAbsoluteValueLessEqualOne(LHS))
+ Known.knownNot(fcInf);
+
break;
}
case Instruction::FDiv:
diff --git a/llvm/test/Transforms/Attributor/AMDGPU/nofpclass-amdgcn-trig-preop.ll b/llvm/test/Transforms/Attributor/AMDGPU/nofpclass-amdgcn-trig-preop.ll
index fe6e939664161..4cb217de2b1eb 100644
--- a/llvm/test/Transforms/Attributor/AMDGPU/nofpclass-amdgcn-trig-preop.ll
+++ b/llvm/test/Transforms/Attributor/AMDGPU/nofpclass-amdgcn-trig-preop.ll
@@ -10,3 +10,116 @@ define double @ret_trig_preop_f64(double %x, i32 %n) {
%ret = call double @llvm.amdgcn.trig.preop.f64(double %x, i32 %n)
ret double %ret
}
+
+define double @ret_not_inf__fmul__trig_preop(double nofpclass(inf) %not.inf, double %x, i32 %n) {
+; CHECK-LABEL: define nofpclass(inf) double @ret_not_inf__fmul__trig_preop(
+; CHECK-SAME: double nofpclass(inf) [[NOT_INF:%.*]], double [[X:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TRIG_PREOP:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double [[X]], i32 [[N]]) #[[ATTR2]]
+; CHECK-NEXT: [[MUL:%.*]] = fmul double [[NOT_INF]], [[TRIG_PREOP]]
+; CHECK-NEXT: ret double [[MUL]]
+;
+ %trig.preop = call double @llvm.amdgcn.trig.preop.f64(double %x, i32 %n)
+ %mul = fmul double %not.inf, %trig.preop
+ ret double %mul
+}
+
+define double @ret_trig_preop__fmul__not_inf(double nofpclass(inf) %not.inf, double %x, i32 %n) {
+; CHECK-LABEL: define nofpclass(inf) double @ret_trig_preop__fmul__not_inf(
+; CHECK-SAME: double nofpclass(inf) [[NOT_INF:%.*]], double [[X:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TRIG_PREOP:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double [[X]], i32 [[N]]) #[[ATTR2]]
+; CHECK-NEXT: [[MUL:%.*]] = fmul double [[TRIG_PREOP]], [[NOT_INF]]
+; CHECK-NEXT: ret double [[MUL]]
+;
+ %trig.preop = call double @llvm.amdgcn.trig.preop.f64(double %x, i32 %n)
+ %mul = fmul double %trig.preop, %not.inf
+ ret double %mul
+}
+
+define double @ret_not_nan__fmul__trig_preop(double nofpclass(nan) %not.nan, double %x, i32 %n) {
+; CHECK-LABEL: define double @ret_not_nan__fmul__trig_preop(
+; CHECK-SAME: double nofpclass(nan) [[NOT_NAN:%.*]], double [[X:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TRIG_PREOP:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double [[X]], i32 [[N]]) #[[ATTR2]]
+; CHECK-NEXT: [[MUL:%.*]] = fmul double [[NOT_NAN]], [[TRIG_PREOP]]
+; CHECK-NEXT: ret double [[MUL]]
+;
+ %trig.preop = call double @llvm.amdgcn.trig.preop.f64(double %x, i32 %n)
+ %mul = fmul double %not.nan, %trig.preop
+ ret double %mul
+}
+
+define double @ret_trig_preop__fmul__not_nan(double nofpclass(nan) %not.nan, double %x, i32 %n) {
+; CHECK-LABEL: define double @ret_trig_preop__fmul__not_nan(
+; CHECK-SAME: double nofpclass(nan) [[NOT_NAN:%.*]], double [[X:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TRIG_PREOP:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double [[X]], i32 [[N]]) #[[ATTR2]]
+; CHECK-NEXT: [[MUL:%.*]] = fmul double [[TRIG_PREOP]], [[NOT_NAN]]
+; CHECK-NEXT: ret double [[MUL]]
+;
+ %trig.preop = call double @llvm.amdgcn.trig.preop.f64(double %x, i32 %n)
+ %mul = fmul double %trig.preop, %not.nan
+ ret double %mul
+}
+
+; Extraction from __ocmlpriv_trigredlarge_f64. This should be able to
+; propagate no-nans to the return.
+define double @trig_preop_propagate_nonan(double noundef nofpclass(inf nan) %x){
+; CHECK-LABEL: define noundef nofpclass(nan) double @trig_preop_propagate_nonan(
+; CHECK-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[I2:%.*]] = tail call double @llvm.amdgcn.trig.preop.f64(double noundef nofpclass(nan inf) [[X]], i32 noundef 0) #[[ATTR2]]
+; CHECK-NEXT: [[CMP:%.*]] = fcmp oge double [[X]], 0x7B00000000000000
+; CHECK-NEXT: [[I9:%.*]] = fmul double [[X]], 0x37F0000000000000
+; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], double [[I9]], double [[X]]
+; CHECK-NEXT: [[MUL11:%.*]] = fmul double [[I2]], [[COND]]
+; CHECK-NEXT: [[FNEG13:%.*]] = fneg double [[MUL11]]
+; CHECK-NEXT: [[I32:%.*]] = tail call noundef nofpclass(nan) double @llvm.fma.f64(double noundef [[I2]], double noundef [[COND]], double noundef [[FNEG13]]) #[[ATTR2]]
+; CHECK-NEXT: ret double [[I32]]
+;
+entry:
+ %i2 = tail call double @llvm.amdgcn.trig.preop.f64(double %x, i32 0)
+ %i4 = tail call double @llvm.amdgcn.trig.preop.f64(double %x, i32 1)
+ %cmp = fcmp oge double %x, 0x7B00000000000000
+ %i9 = fmul double %x, 0x37F0000000000000
+ %cond = select i1 %cmp, double %i9, double %x
+ %mul4 = fmul double %i4, %cond
+ %mul11 = fmul double %i2, %cond
+ %fneg13 = fneg double %mul11
+ %i32 = tail call double @llvm.fma.f64(double %i2, double %cond, double %fneg13)
+ ret double %i32
+}
+
+; Superset of trig_preop_propagate_nonan. We would like to able to
+; propagate nonans to the return, but this hits the recursion depth
+; limit.
+define double @trig_preop_propagate_nonan_full(double noundef nofpclass(inf nan) %x) {
+; CHECK-LABEL: define double @trig_preop_propagate_nonan_full(
+; CHECK-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[I2:%.*]] = tail call double @llvm.amdgcn.trig.preop.f64(double noundef nofpclass(nan inf) [[X]], i32 noundef 0) #[[ATTR2]]
+; CHECK-NEXT: [[I4:%.*]] = tail call double @llvm.amdgcn.trig.preop.f64(double noundef nofpclass(nan inf) [[X]], i32 noundef 1) #[[ATTR2]]
+; CHECK-NEXT: [[CMP:%.*]] = fcmp oge double [[X]], 0x7B00000000000000
+; CHECK-NEXT: [[I9:%.*]] = fmul double [[X]], 0x37F0000000000000
+; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], double [[I9]], double [[X]]
+; CHECK-NEXT: [[MUL4:%.*]] = fmul double [[I4]], [[COND]]
+; CHECK-NEXT: [[MUL11:%.*]] = fmul double [[I2]], [[COND]]
+; CHECK-NEXT: [[FNEG13:%.*]] = fneg double [[MUL11]]
+; CHECK-NEXT: [[I32:%.*]] = tail call double @llvm.fma.f64(double noundef [[I2]], double noundef [[COND]], double noundef [[FNEG13]]) #[[ATTR2]]
+; CHECK-NEXT: [[ADD:%.*]] = fadd double [[MUL4]], [[I32]]
+; CHECK-NEXT: [[ADD57:%.*]] = fadd double [[MUL11]], [[ADD]]
+; CHECK-NEXT: [[I108:%.*]] = fmul double [[ADD57]], 2.500000e-01
+; CHECK-NEXT: ret double [[I108]]
+;
+entry:
+ %i2 = tail call double @llvm.amdgcn.trig.preop.f64(double %x, i32 0)
+ %i4 = tail call double @llvm.amdgcn.trig.preop.f64(double %x, i32 1)
+ %cmp = fcmp oge double %x, 0x7B00000000000000
+ %i9 = fmul double %x, 0x37F0000000000000
+ %cond = select i1 %cmp, double %i9, double %x
+ %mul4 = fmul double %i4, %cond
+ %mul11 = fmul double %i2, %cond
+ %fneg13 = fneg double %mul11
+ %i32 = tail call double @llvm.fma.f64(double %i2, double %cond, double %fneg13)
+ %add = fadd double %mul4, %i32
+ %add57 = fadd double %mul11, %add
+ %i108 = fmul double %add57, 2.500000e-01
+ ret double %i108
+}
>From 7c86ea1c29e6322cdfd1e3ff8ff56d801df01f82 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Thu, 26 Feb 2026 16:34:12 +0100
Subject: [PATCH 2/4] cleanup
---
llvm/lib/Analysis/ValueTracking.cpp | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 5eb048d1fd8ff..dba5ea13fa26d 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -4968,6 +4968,7 @@ static constexpr KnownFPClass::MinMaxKind getMinMaxKind(Intrinsic::ID IID) {
/// \return true if this is a floating point value that is known to have a
/// magintude smaller than 1. i.e., fabs(X) <=1.0
static bool isAbsoluteValueLessEqualOne(const Value *V) {
+ // TODO: Handle frexp and x - floor(x)?
return match(V, m_Intrinsic<Intrinsic::amdgcn_trig_preop>(m_Value()));
}
@@ -5596,8 +5597,8 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
const APFloat *CRHS;
if (match(RHS, m_APFloat(CRHS))) {
- computeKnownFPClass(Op->getOperand(0), DemandedElts, fcAllFlags, KnownLHS,
- Q, Depth + 1);
+ computeKnownFPClass(LHS, DemandedElts, fcAllFlags, KnownLHS, Q,
+ Depth + 1);
Known = KnownFPClass::fmul(KnownLHS, *CRHS, Mode);
} else {
computeKnownFPClass(RHS, DemandedElts, fcAllFlags, KnownRHS, Q,
>From a90c0d24966bcd30fcf4203848640d2e137a1646 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Thu, 26 Feb 2026 16:48:52 +0100
Subject: [PATCH 3/4] Typo fix
---
llvm/lib/Analysis/ValueTracking.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index dba5ea13fa26d..c743f421026e2 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -4966,7 +4966,7 @@ static constexpr KnownFPClass::MinMaxKind getMinMaxKind(Intrinsic::ID IID) {
}
/// \return true if this is a floating point value that is known to have a
-/// magintude smaller than 1. i.e., fabs(X) <=1.0
+/// magnitude smaller than 1. i.e., fabs(X) <=1.0
static bool isAbsoluteValueLessEqualOne(const Value *V) {
// TODO: Handle frexp and x - floor(x)?
return match(V, m_Intrinsic<Intrinsic::amdgcn_trig_preop>(m_Value()));
>From fd63deffdeee8c3e281a9561d5df1801697b4c25 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Thu, 26 Feb 2026 16:49:10 +0100
Subject: [PATCH 4/4] Typo fix
---
llvm/lib/Analysis/ValueTracking.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index c743f421026e2..ce8e27fef5e8a 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -4966,7 +4966,7 @@ static constexpr KnownFPClass::MinMaxKind getMinMaxKind(Intrinsic::ID IID) {
}
/// \return true if this is a floating point value that is known to have a
-/// magnitude smaller than 1. i.e., fabs(X) <=1.0
+/// magnitude smaller than 1. i.e., fabs(X) <= 1.0
static bool isAbsoluteValueLessEqualOne(const Value *V) {
// TODO: Handle frexp and x - floor(x)?
return match(V, m_Intrinsic<Intrinsic::amdgcn_trig_preop>(m_Value()));
More information about the llvm-branch-commits
mailing list