[llvm-branch-commits] [llvm] ValueTracking: Special case fmul by llvm.amdgcn.trig.preop (PR #183373)

Thu Feb 26 07:49:20 PST 2026

https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/183373

>From 618e4f59e6ccdc663680253a9962f58abfb275e3 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Wed, 25 Feb 2026 15:45:08 +0100
Subject: [PATCH 1/4] ValueTracking: Special case fmul by
 llvm.amdgcn.trig.preop

This is another instance of the logic from #183159. If we know
one source is not-infinity, and the other source is less than or
equal to 1, this cannot overflow. Special case llvm.amdgcn.trig.preop,
as a substitute for proper range tracking. This almost enables pruning
edge case handling in trig function implementations, if not for the
recursion depth limit (but that's a problem for another day).
---
 llvm/lib/Analysis/ValueTracking.cpp           |  37 ++++--
 .../AMDGPU/nofpclass-amdgcn-trig-preop.ll     | 113 ++++++++++++++++++
 2 files changed, 138 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 2684b41cd1e5d..5eb048d1fd8ff 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -4965,6 +4965,12 @@ static constexpr KnownFPClass::MinMaxKind getMinMaxKind(Intrinsic::ID IID) {
   }
 }
 
+/// \return true if this is a floating point value that is known to have a
+/// magintude smaller than 1. i.e., fabs(X) <=1.0
+static bool isAbsoluteValueLessEqualOne(const Value *V) {
+  return match(V, m_Intrinsic<Intrinsic::amdgcn_trig_preop>(m_Value()));
+}
+
 void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
                          FPClassTest InterestedClasses, KnownFPClass &Known,
                          const SimplifyQuery &Q, unsigned Depth) {
@@ -5574,37 +5580,44 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
                 Op->getType()->getScalarType()->getFltSemantics())
           : DenormalMode::getDynamic();
 
+    Value *LHS = Op->getOperand(0);
+    Value *RHS = Op->getOperand(1);
     // X * X is always non-negative or a NaN.
     // FIXME: Should check isGuaranteedNotToBeUndef
-    if (Op->getOperand(0) == Op->getOperand(1)) {
+    if (LHS == RHS) {
       KnownFPClass KnownSrc;
-      computeKnownFPClass(Op->getOperand(0), DemandedElts, fcAllFlags, KnownSrc,
-                          Q, Depth + 1);
+      computeKnownFPClass(LHS, DemandedElts, fcAllFlags, KnownSrc, Q,
+                          Depth + 1);
       Known = KnownFPClass::square(KnownSrc, Mode);
       break;
     }
 
+    KnownFPClass KnownLHS, KnownRHS;
+
     const APFloat *CRHS;
-    if (match(Op->getOperand(1), m_APFloat(CRHS))) {
-      KnownFPClass KnownLHS;
+    if (match(RHS, m_APFloat(CRHS))) {
       computeKnownFPClass(Op->getOperand(0), DemandedElts, fcAllFlags, KnownLHS,
                           Q, Depth + 1);
-
       Known = KnownFPClass::fmul(KnownLHS, *CRHS, Mode);
     } else {
-      KnownFPClass KnownLHS, KnownRHS;
-
-      computeKnownFPClass(Op->getOperand(1), DemandedElts, fcAllFlags, KnownRHS,
-                          Q, Depth + 1);
+      computeKnownFPClass(RHS, DemandedElts, fcAllFlags, KnownRHS, Q,
+                          Depth + 1);
       // TODO: Improve accuracy in unfused FMA pattern. We can prove an
       // additional not-nan if the addend is known-not negative infinity if the
       // multiply is known-not infinity.
 
-      computeKnownFPClass(Op->getOperand(0), DemandedElts, fcAllFlags, KnownLHS,
-                          Q, Depth + 1);
+      computeKnownFPClass(LHS, DemandedElts, fcAllFlags, KnownLHS, Q,
+                          Depth + 1);
       Known = KnownFPClass::fmul(KnownLHS, KnownRHS, Mode);
     }
 
+    /// Propgate no-infs if the other source is known smaller than one, such
+    /// that this cannot introduce overflow.
+    if (KnownLHS.isKnownNever(fcInf) && isAbsoluteValueLessEqualOne(RHS))
+      Known.knownNot(fcInf);
+    else if (KnownRHS.isKnownNever(fcInf) && isAbsoluteValueLessEqualOne(LHS))
+      Known.knownNot(fcInf);
+
     break;
   }
   case Instruction::FDiv:
diff --git a/llvm/test/Transforms/Attributor/AMDGPU/nofpclass-amdgcn-trig-preop.ll b/llvm/test/Transforms/Attributor/AMDGPU/nofpclass-amdgcn-trig-preop.ll
index fe6e939664161..4cb217de2b1eb 100644
--- a/llvm/test/Transforms/Attributor/AMDGPU/nofpclass-amdgcn-trig-preop.ll
+++ b/llvm/test/Transforms/Attributor/AMDGPU/nofpclass-amdgcn-trig-preop.ll
@@ -10,3 +10,116 @@ define double @ret_trig_preop_f64(double %x, i32 %n) {
   %ret = call double @llvm.amdgcn.trig.preop.f64(double %x, i32 %n)
   ret double %ret
 }
+
+define double @ret_not_inf__fmul__trig_preop(double nofpclass(inf) %not.inf, double %x, i32 %n) {
+; CHECK-LABEL: define nofpclass(inf) double @ret_not_inf__fmul__trig_preop(
+; CHECK-SAME: double nofpclass(inf) [[NOT_INF:%.*]], double [[X:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TRIG_PREOP:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double [[X]], i32 [[N]]) #[[ATTR2]]
+; CHECK-NEXT:    [[MUL:%.*]] = fmul double [[NOT_INF]], [[TRIG_PREOP]]
+; CHECK-NEXT:    ret double [[MUL]]
+;
+  %trig.preop = call double @llvm.amdgcn.trig.preop.f64(double %x, i32 %n)
+  %mul = fmul double %not.inf, %trig.preop
+  ret double %mul
+}
+
+define double @ret_trig_preop__fmul__not_inf(double nofpclass(inf) %not.inf, double %x, i32 %n) {
+; CHECK-LABEL: define nofpclass(inf) double @ret_trig_preop__fmul__not_inf(
+; CHECK-SAME: double nofpclass(inf) [[NOT_INF:%.*]], double [[X:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TRIG_PREOP:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double [[X]], i32 [[N]]) #[[ATTR2]]
+; CHECK-NEXT:    [[MUL:%.*]] = fmul double [[TRIG_PREOP]], [[NOT_INF]]
+; CHECK-NEXT:    ret double [[MUL]]
+;
+  %trig.preop = call double @llvm.amdgcn.trig.preop.f64(double %x, i32 %n)
+  %mul = fmul double %trig.preop, %not.inf
+  ret double %mul
+}
+
+define double @ret_not_nan__fmul__trig_preop(double nofpclass(nan) %not.nan, double %x, i32 %n) {
+; CHECK-LABEL: define double @ret_not_nan__fmul__trig_preop(
+; CHECK-SAME: double nofpclass(nan) [[NOT_NAN:%.*]], double [[X:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TRIG_PREOP:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double [[X]], i32 [[N]]) #[[ATTR2]]
+; CHECK-NEXT:    [[MUL:%.*]] = fmul double [[NOT_NAN]], [[TRIG_PREOP]]
+; CHECK-NEXT:    ret double [[MUL]]
+;
+  %trig.preop = call double @llvm.amdgcn.trig.preop.f64(double %x, i32 %n)
+  %mul = fmul double %not.nan, %trig.preop
+  ret double %mul
+}
+
+define double @ret_trig_preop__fmul__not_nan(double nofpclass(nan) %not.nan, double %x, i32 %n) {
+; CHECK-LABEL: define double @ret_trig_preop__fmul__not_nan(
+; CHECK-SAME: double nofpclass(nan) [[NOT_NAN:%.*]], double [[X:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TRIG_PREOP:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double [[X]], i32 [[N]]) #[[ATTR2]]
+; CHECK-NEXT:    [[MUL:%.*]] = fmul double [[TRIG_PREOP]], [[NOT_NAN]]
+; CHECK-NEXT:    ret double [[MUL]]
+;
+  %trig.preop = call double @llvm.amdgcn.trig.preop.f64(double %x, i32 %n)
+  %mul = fmul double %trig.preop, %not.nan
+  ret double %mul
+}
+
+; Extraction from __ocmlpriv_trigredlarge_f64. This should be able to
+; propagate no-nans to the return.
+define double @trig_preop_propagate_nonan(double noundef nofpclass(inf nan) %x){
+; CHECK-LABEL: define noundef nofpclass(nan) double @trig_preop_propagate_nonan(
+; CHECK-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[I2:%.*]] = tail call double @llvm.amdgcn.trig.preop.f64(double noundef nofpclass(nan inf) [[X]], i32 noundef 0) #[[ATTR2]]
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp oge double [[X]], 0x7B00000000000000
+; CHECK-NEXT:    [[I9:%.*]] = fmul double [[X]], 0x37F0000000000000
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], double [[I9]], double [[X]]
+; CHECK-NEXT:    [[MUL11:%.*]] = fmul double [[I2]], [[COND]]
+; CHECK-NEXT:    [[FNEG13:%.*]] = fneg double [[MUL11]]
+; CHECK-NEXT:    [[I32:%.*]] = tail call noundef nofpclass(nan) double @llvm.fma.f64(double noundef [[I2]], double noundef [[COND]], double noundef [[FNEG13]]) #[[ATTR2]]
+; CHECK-NEXT:    ret double [[I32]]
+;
+entry:
+  %i2 = tail call double @llvm.amdgcn.trig.preop.f64(double %x, i32 0)
+  %i4 = tail call double @llvm.amdgcn.trig.preop.f64(double %x, i32 1)
+  %cmp = fcmp oge double %x, 0x7B00000000000000
+  %i9 = fmul double %x, 0x37F0000000000000
+  %cond = select i1 %cmp, double %i9, double %x
+  %mul4 = fmul double %i4, %cond
+  %mul11 = fmul double %i2, %cond
+  %fneg13 = fneg double %mul11
+  %i32 = tail call double @llvm.fma.f64(double %i2, double %cond, double %fneg13)
+  ret double %i32
+}
+
+; Superset of trig_preop_propagate_nonan. We would like to able to
+; propagate nonans to the return, but this hits the recursion depth
+; limit.
+define double @trig_preop_propagate_nonan_full(double noundef nofpclass(inf nan) %x) {
+; CHECK-LABEL: define double @trig_preop_propagate_nonan_full(
+; CHECK-SAME: double noundef nofpclass(nan inf) [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[I2:%.*]] = tail call double @llvm.amdgcn.trig.preop.f64(double noundef nofpclass(nan inf) [[X]], i32 noundef 0) #[[ATTR2]]
+; CHECK-NEXT:    [[I4:%.*]] = tail call double @llvm.amdgcn.trig.preop.f64(double noundef nofpclass(nan inf) [[X]], i32 noundef 1) #[[ATTR2]]
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp oge double [[X]], 0x7B00000000000000
+; CHECK-NEXT:    [[I9:%.*]] = fmul double [[X]], 0x37F0000000000000
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[CMP]], double [[I9]], double [[X]]
+; CHECK-NEXT:    [[MUL4:%.*]] = fmul double [[I4]], [[COND]]
+; CHECK-NEXT:    [[MUL11:%.*]] = fmul double [[I2]], [[COND]]
+; CHECK-NEXT:    [[FNEG13:%.*]] = fneg double [[MUL11]]
+; CHECK-NEXT:    [[I32:%.*]] = tail call double @llvm.fma.f64(double noundef [[I2]], double noundef [[COND]], double noundef [[FNEG13]]) #[[ATTR2]]
+; CHECK-NEXT:    [[ADD:%.*]] = fadd double [[MUL4]], [[I32]]
+; CHECK-NEXT:    [[ADD57:%.*]] = fadd double [[MUL11]], [[ADD]]
+; CHECK-NEXT:    [[I108:%.*]] = fmul double [[ADD57]], 2.500000e-01
+; CHECK-NEXT:    ret double [[I108]]
+;
+entry:
+  %i2 = tail call double @llvm.amdgcn.trig.preop.f64(double %x, i32 0)
+  %i4 = tail call double @llvm.amdgcn.trig.preop.f64(double %x, i32 1)
+  %cmp = fcmp oge double %x, 0x7B00000000000000
+  %i9 = fmul double %x, 0x37F0000000000000
+  %cond = select i1 %cmp, double %i9, double %x
+  %mul4 = fmul double %i4, %cond
+  %mul11 = fmul double %i2, %cond
+  %fneg13 = fneg double %mul11
+  %i32 = tail call double @llvm.fma.f64(double %i2, double %cond, double %fneg13)
+  %add = fadd double %mul4, %i32
+  %add57 = fadd double %mul11, %add
+  %i108 = fmul double %add57, 2.500000e-01
+  ret double %i108
+}

>From 7c86ea1c29e6322cdfd1e3ff8ff56d801df01f82 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Thu, 26 Feb 2026 16:34:12 +0100
Subject: [PATCH 2/4] cleanup

---
 llvm/lib/Analysis/ValueTracking.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 5eb048d1fd8ff..dba5ea13fa26d 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -4968,6 +4968,7 @@ static constexpr KnownFPClass::MinMaxKind getMinMaxKind(Intrinsic::ID IID) {
 /// \return true if this is a floating point value that is known to have a
 /// magintude smaller than 1. i.e., fabs(X) <=1.0
 static bool isAbsoluteValueLessEqualOne(const Value *V) {
+  // TODO: Handle frexp and x - floor(x)?
   return match(V, m_Intrinsic<Intrinsic::amdgcn_trig_preop>(m_Value()));
 }
 
@@ -5596,8 +5597,8 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
 
     const APFloat *CRHS;
     if (match(RHS, m_APFloat(CRHS))) {
-      computeKnownFPClass(Op->getOperand(0), DemandedElts, fcAllFlags, KnownLHS,
-                          Q, Depth + 1);
+      computeKnownFPClass(LHS, DemandedElts, fcAllFlags, KnownLHS, Q,
+                          Depth + 1);
       Known = KnownFPClass::fmul(KnownLHS, *CRHS, Mode);
     } else {
       computeKnownFPClass(RHS, DemandedElts, fcAllFlags, KnownRHS, Q,

>From a90c0d24966bcd30fcf4203848640d2e137a1646 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Thu, 26 Feb 2026 16:48:52 +0100
Subject: [PATCH 3/4] Typo fix

---
 llvm/lib/Analysis/ValueTracking.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index dba5ea13fa26d..c743f421026e2 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -4966,7 +4966,7 @@ static constexpr KnownFPClass::MinMaxKind getMinMaxKind(Intrinsic::ID IID) {
 }
 
 /// \return true if this is a floating point value that is known to have a
-/// magintude smaller than 1. i.e., fabs(X) <=1.0
+/// magnitude smaller than 1. i.e., fabs(X) <=1.0
 static bool isAbsoluteValueLessEqualOne(const Value *V) {
   // TODO: Handle frexp and x - floor(x)?
   return match(V, m_Intrinsic<Intrinsic::amdgcn_trig_preop>(m_Value()));

>From fd63deffdeee8c3e281a9561d5df1801697b4c25 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Thu, 26 Feb 2026 16:49:10 +0100
Subject: [PATCH 4/4] Typo fix

---
 llvm/lib/Analysis/ValueTracking.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index c743f421026e2..ce8e27fef5e8a 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -4966,7 +4966,7 @@ static constexpr KnownFPClass::MinMaxKind getMinMaxKind(Intrinsic::ID IID) {
 }
 
 /// \return true if this is a floating point value that is known to have a
-/// magnitude smaller than 1. i.e., fabs(X) <=1.0
+/// magnitude smaller than 1. i.e., fabs(X) <= 1.0
 static bool isAbsoluteValueLessEqualOne(const Value *V) {
   // TODO: Handle frexp and x - floor(x)?
   return match(V, m_Intrinsic<Intrinsic::amdgcn_trig_preop>(m_Value()));