[llvm] [ValueTracking] Propagate sign information out of loop (PR #175590)

Kshitij Paranjape via llvm-commits llvm-commits at lists.llvm.org
Fri Jan 16 12:32:54 PST 2026


https://github.com/kshitijvp updated https://github.com/llvm/llvm-project/pull/175590

>From 6cc88734604fb7aeca93cce1d24c2c73fe1dd4f5 Mon Sep 17 00:00:00 2001
From: Kshitij Paranjape <kshitijvparanjape at gmail.com>
Date: Mon, 12 Jan 2026 22:20:27 +0530
Subject: [PATCH 1/3] [AggressiveInstCombine] Propogate sign information out of
 loop

LLVM converts sqrt libcall to intrinsic call if the argument
is within the range(greater than or equal to 0.0). In this case
the compiler is not able to deduce the non-negativity on
its own. Extended ValueTracking to understand such loops.

Fixes llvm/llvm-project#174813
---
 llvm/lib/Analysis/ValueTracking.cpp           | 20 +++++++-
 .../libcalltointrinsic.ll                     | 48 +++++++++++++++++++
 2 files changed, 67 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/Transforms/AggressiveInstCombine/libcalltointrinsic.ll

diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index dbb44c8828545..65822a62c6f3d 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -5953,7 +5953,25 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
     // Unreachable blocks may have zero-operand PHI nodes.
     if (P->getNumIncomingValues() == 0)
       break;
-
+    // Look for the case of a for loop which has a positive
+    // initial value and is incremented by a squared value.
+    // This will propogate sign information out of such loops.
+    if (P->getNumIncomingValues() == 2) {
+      Value *Start = P->getIncomingValue(0);
+      Value *RecurValue = P->getIncomingValue(1);
+      Value *X;
+      if (match(RecurValue,
+                m_Intrinsic<Intrinsic::fmuladd>(m_Value(X), m_Value(X), m_Specific(P)))) {
+        KnownFPClass KnownStart;
+        computeKnownFPClass(Start, DemandedElts,
+                            KnownFPClass::OrderedLessThanZeroMask, KnownStart,
+                            Q, Depth + 1);
+        if (KnownStart.cannotBeOrderedLessThanZero()) {
+          Known.knownNot(KnownFPClass::OrderedLessThanZeroMask);
+        }
+        break;
+      }
+    }
     // Otherwise take the unions of the known bit sets of the operands,
     // taking conservative care to avoid excessive recursion.
     const unsigned PhiRecursionLimit = MaxAnalysisRecursionDepth - 2;
diff --git a/llvm/test/Transforms/AggressiveInstCombine/libcalltointrinsic.ll b/llvm/test/Transforms/AggressiveInstCombine/libcalltointrinsic.ll
new file mode 100644
index 0000000000000..a734abb6727e3
--- /dev/null
+++ b/llvm/test/Transforms/AggressiveInstCombine/libcalltointrinsic.ll
@@ -0,0 +1,48 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=aggressive-instcombine -S | FileCheck %s
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define dso_local noundef double @CompareDistmats(ptr noundef %distmat1, ptr noundef %distmat2) local_unnamed_addr {
+; CHECK-LABEL: @CompareDistmats(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_COND:%.*]]
+; CHECK:       for.cond:
+; CHECK-NEXT:    [[RMSD_0:%.*]] = phi double [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[TMP2:%.*]], [[FOR_BODY:%.*]] ]
+; CHECK-NEXT:    [[CMP:%.*]] = phi i1 [ true, [[ENTRY]] ], [ false, [[FOR_BODY]] ]
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    [[SQRT:%.*]] = call double @llvm.sqrt.f64(double [[RMSD_0]])
+; CHECK-NEXT:    ret double [[SQRT]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[TMP0:%.*]] = load double, ptr [[DISTMAT1:%.*]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load double, ptr [[DISTMAT2:%.*]], align 8
+; CHECK-NEXT:    [[SUB:%.*]] = fsub double [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    [[TMP2]] = call double @llvm.fmuladd.f64(double [[SUB]], double [[SUB]], double [[RMSD_0]])
+; CHECK-NEXT:    br label [[FOR_COND]]
+;
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %RMSD.0 = phi double [ 0.000000e+00, %entry ], [ %2, %for.body ]
+  %cmp = phi i1 [ true, %entry ], [ false, %for.body ]
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond
+  %call = call double @sqrt(double noundef %RMSD.0)
+  ret double %call
+
+for.body:                                         ; preds = %for.cond
+  %0 = load double, ptr %distmat1, align 8
+  %1 = load double, ptr %distmat2, align 8
+  %sub = fsub double %0, %1
+  %2 = call double @llvm.fmuladd.f64(double %sub, double %sub, double %RMSD.0)
+  br label %for.cond
+}
+
+; Function Attrs: mustprogress nofree nounwind willreturn memory(write)
+declare double @sqrt(double noundef) local_unnamed_addr
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare double @llvm.fmuladd.f64(double, double, double)

>From a308a16c5505b0e8f4cafff3da0432a5c8104baf Mon Sep 17 00:00:00 2001
From: Kshitij Paranjape <kshitijvparanjape at gmail.com>
Date: Sat, 17 Jan 2026 00:33:51 +0530
Subject: [PATCH 2/3] Added simple recurrence pattern matcher for ternary
 intrinsics

Added functions to match simple recurrence patterns in case
of ternary intrinsics as it was supported only for binary
intrinsics currently. Used the matchSimpleTernaryRecurrence
to match the specific case of for loop in which initial value
is zero and is being incremented by a squared-value. Shifted
the tests to X86 subdirectory and also added negative test
in the case when it is not being incremented by a
squared-value.
---
 llvm/include/llvm/Analysis/ValueTracking.h    |  4 +
 llvm/lib/Analysis/ValueTracking.cpp           | 84 +++++++++++++++----
 .../AggressiveInstCombine/X86/pr175590.ll     | 45 ++++++++++
 .../libcalltointrinsic.ll                     | 48 -----------
 4 files changed, 119 insertions(+), 62 deletions(-)
 create mode 100644 llvm/test/Transforms/AggressiveInstCombine/X86/pr175590.ll
 delete mode 100644 llvm/test/Transforms/AggressiveInstCombine/libcalltointrinsic.ll

diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h
index 2ce49c558b241..62ff63e2bfb5c 100644
--- a/llvm/include/llvm/Analysis/ValueTracking.h
+++ b/llvm/include/llvm/Analysis/ValueTracking.h
@@ -1006,6 +1006,10 @@ LLVM_ABI bool matchSimpleBinaryIntrinsicRecurrence(const IntrinsicInst *I,
 ///  T | T | F
 ///  F | T | T
 /// (A)
+LLVM_ABI bool matchSimpleTernaryIntrinsicRecurrence(const IntrinsicInst *I,
+                                                    PHINode *&P, Value *&Init,
+                                                    Value *&OtherOp0,
+                                                    Value *&OtherOp1);
 LLVM_ABI std::optional<bool>
 isImpliedCondition(const Value *LHS, const Value *RHS, const DataLayout &DL,
                    bool LHSIsTrue = true, unsigned Depth = 0);
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 65822a62c6f3d..557d2f407a7ee 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -1771,7 +1771,7 @@ static void computeKnownBitsFromOperator(const Operator *I,
     const PHINode *P = cast<PHINode>(I);
     BinaryOperator *BO = nullptr;
     Value *R = nullptr, *L = nullptr;
-    if (matchSimpleRecurrence(P, BO, R, L)) {
+    if (llvm::matchSimpleRecurrence(P, BO, R, L)) {
       // Handle the case of a simple two-predecessor recurrence PHI.
       // There's a lot more that could theoretically be done here, but
       // this is sufficient to catch some interesting cases.
@@ -5955,21 +5955,26 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
       break;
     // Look for the case of a for loop which has a positive
     // initial value and is incremented by a squared value.
-    // This will propogate sign information out of such loops.
+    // This will propagate sign information out of such loops.
     if (P->getNumIncomingValues() == 2) {
-      Value *Start = P->getIncomingValue(0);
       Value *RecurValue = P->getIncomingValue(1);
-      Value *X;
-      if (match(RecurValue,
-                m_Intrinsic<Intrinsic::fmuladd>(m_Value(X), m_Value(X), m_Specific(P)))) {
-        KnownFPClass KnownStart;
-        computeKnownFPClass(Start, DemandedElts,
-                            KnownFPClass::OrderedLessThanZeroMask, KnownStart,
-                            Q, Depth + 1);
-        if (KnownStart.cannotBeOrderedLessThanZero()) {
-          Known.knownNot(KnownFPClass::OrderedLessThanZeroMask);
+      IntrinsicInst* I = dyn_cast<IntrinsicInst>(RecurValue);
+      Value *R, *L;
+      Value *Init;
+      PHINode *PN;
+      if (matchSimpleTernaryIntrinsicRecurrence(I, PN, Init, L, R)) {
+        switch(I->getIntrinsicID()) {
+        case Intrinsic::fmuladd: {
+          KnownFPClass KnownStart;
+          computeKnownFPClass(Init, DemandedElts, 
+                              KnownFPClass::OrderedGreaterThanZeroMask, KnownStart,
+                              Q, Depth + 1 );
+          if (KnownStart.cannotBeOrderedLessThanZero() && R == L) {
+            Known.knownNot(KnownFPClass::OrderedLessThanZeroMask);
+          }
+          break;
+        }
         }
-        break;
       }
     }
     // Otherwise take the unions of the known bit sets of the operands,
@@ -9288,6 +9293,40 @@ static bool matchTwoInputRecurrence(const PHINode *PN, InstTy *&Inst,
   return false;
 }
 
+template <typename InstTy>
+static bool matchThreeInputRecurrence(const PHINode *PN, InstTy *&Inst,
+                                      Value *&Init, Value *&OtherOp0,
+                                      Value *&OtherOp1) {
+  if (PN->getNumIncomingValues() != 2)
+    return false;
+  
+  for (unsigned I = 0; I != 3; ++I) {
+    if (auto *Operation = dyn_cast<InstTy>(PN->getIncomingValue(I));
+        Operation) {
+      Value *Op0 = Operation->getOperand(0);
+      Value *Op1 = Operation->getOperand(1);
+      Value *Op2 = Operation->getOperand(2);
+
+      if (Op0 != PN && Op1 != PN && Op2 != PN)
+        continue;
+
+      Inst = Operation;
+      Init = PN->getIncomingValue(!I);
+      if (Op0 == PN) {
+        OtherOp0 = Op1;
+        OtherOp1 = Op2;
+      } else if (Op1 == PN) {
+        OtherOp0 = Op0;
+        OtherOp1 = Op2;
+      } else {
+        OtherOp0 = Op0;
+        OtherOp1 = Op1;
+      }
+      return true;
+    }
+  }
+  return false;
+}
 bool llvm::matchSimpleRecurrence(const PHINode *P, BinaryOperator *&BO,
                                  Value *&Start, Value *&Step) {
   // We try to match a recurrence of the form:
@@ -9324,6 +9363,23 @@ bool llvm::matchSimpleBinaryIntrinsicRecurrence(const IntrinsicInst *I,
   return P && matchTwoInputRecurrence(P, II, Init, OtherOp) && II == I;
 }
 
+bool llvm::matchSimpleTernaryIntrinsicRecurrence(const IntrinsicInst *I,
+                                                  PHINode *&P, Value *&Init,
+                                                  Value *&OtherOp0, Value*&OtherOp1) {
+  if (I->arg_size() != 3 || I->getType() != I->getArgOperand(0)->getType() ||
+      I->getType() != I->getArgOperand(1)->getType() ||
+      I->getType() != I->getArgOperand(2)->getType())
+    return false;
+  IntrinsicInst *II = nullptr;
+  P = dyn_cast<PHINode>(I->getArgOperand(0));
+  if (!P) {
+    P = dyn_cast<PHINode>(I->getArgOperand(1));
+    if (!P)
+      P = dyn_cast<PHINode>(I->getArgOperand(2));
+  }
+  return P && matchThreeInputRecurrence(P, II, Init, OtherOp0, OtherOp1) && II == I;
+}
+
 /// Return true if "icmp Pred LHS RHS" is always true.
 static bool isTruePredicate(CmpInst::Predicate Pred, const Value *LHS,
                             const Value *RHS) {
@@ -10560,4 +10616,4 @@ bool llvm::collectPossibleValues(const Value *V,
     }
   }
   return true;
-}
+}
\ No newline at end of file
diff --git a/llvm/test/Transforms/AggressiveInstCombine/X86/pr175590.ll b/llvm/test/Transforms/AggressiveInstCombine/X86/pr175590.ll
new file mode 100644
index 0000000000000..c504762af8cae
--- /dev/null
+++ b/llvm/test/Transforms/AggressiveInstCombine/X86/pr175590.ll
@@ -0,0 +1,45 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes=aggressive-instcombine -S | FileCheck %s
+target triple = "x86_64-unknown-linux-gnu"
+define dso_local noundef double @CompareDistmats(double noundef %distmat1_, double noundef %distmat2_) local_unnamed_addr #0 {
+; CHECK-LABEL: define dso_local noundef double @CompareDistmats(
+; CHECK-SAME: double noundef [[DISTMAT1_:%.*]], double noundef [[DISTMAT2_:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[SUB:%.*]] = fsub double [[DISTMAT1_]], [[DISTMAT2_]]
+; CHECK-NEXT:    [[TMP0:%.*]] = tail call double @llvm.fmuladd.f64(double [[SUB]], double [[SUB]], double 0.000000e+00)
+; CHECK-NEXT:    [[SQRT:%.*]] = call double @llvm.sqrt.f64(double [[TMP0]])
+; CHECK-NEXT:    ret double [[SQRT]]
+;
+entry:
+  %sub = fsub double %distmat1_, %distmat2_
+  %fmacall = tail call double @llvm.fmuladd.f64(double %sub, double %sub, double 0.000000e+00)
+  %call = tail call double @sqrt(double noundef %fmacall) #3
+  ret double %call
+}
+
+define dso_local noundef double @nonSquareCompareDistmats(double noundef %distmat1_, double noundef %distmat2_) local_unnamed_addr #0 {
+; CHECK-LABEL: define dso_local noundef double @nonSquareCompareDistmats(
+; CHECK-SAME: double noundef [[DISTMAT1_:%.*]], double noundef [[DISTMAT2_:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[SUB:%.*]] = fsub double [[DISTMAT1_]], [[DISTMAT2_]]
+; CHECK-NEXT:    [[ADD:%.*]] = fsub double [[DISTMAT1_]], [[DISTMAT2_]]
+; CHECK-NEXT:    [[FMACALL:%.*]] = tail call double @llvm.fmuladd.f64(double [[SUB]], double [[ADD]], double 0.000000e+00)
+; CHECK-NEXT:    [[CALL:%.*]] = tail call double @sqrt(double noundef [[FMACALL]]) #[[ATTR3:[0-9]+]]
+; CHECK-NEXT:    ret double [[CALL]]
+;
+entry:
+  %sub = fsub double %distmat1_, %distmat2_
+  %add = fsub double %distmat1_, %distmat2_
+  %fmacall = tail call double @llvm.fmuladd.f64(double %sub, double %add, double 0.000000e+00)
+  %call = tail call double @sqrt(double noundef %fmacall) #3
+  ret double %call
+}
+
+declare double @llvm.fmuladd.f64(double, double, double) #1
+
+declare double @sqrt(double noundef) local_unnamed_addr #2
+
+attributes #0 = { mustprogress nofree norecurse nounwind willreturn memory(errnomem: write) uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="znver3" "target-features"="+adx,+aes,+avx,+avx2,+bmi,+bmi2,+clflushopt,+clwb,+clzero,+crc32,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+mwaitx,+pclmul,+pku,+popcnt,+prfchw,+rdpid,+rdpru,+rdrnd,+rdseed,+sahf,+sha,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+sse4a,+ssse3,+vaes,+vpclmulqdq,+wbnoinvd,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" }
+attributes #1 = { mustprogress nocallback nocreateundeforpoison nofree nosync nounwind speculatable willreturn memory(none) }
+attributes #2 = { mustprogress nocallback nofree nounwind willreturn memory(errnomem: write) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="znver3" "target-features"="+adx,+aes,+avx,+avx2,+bmi,+bmi2,+clflushopt,+clwb,+clzero,+crc32,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+mwaitx,+pclmul,+pku,+popcnt,+prfchw,+rdpid,+rdpru,+rdrnd,+rdseed,+sahf,+sha,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+sse4a,+ssse3,+vaes,+vpclmulqdq,+wbnoinvd,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" }
+attributes #3 = { nounwind }
diff --git a/llvm/test/Transforms/AggressiveInstCombine/libcalltointrinsic.ll b/llvm/test/Transforms/AggressiveInstCombine/libcalltointrinsic.ll
deleted file mode 100644
index a734abb6727e3..0000000000000
--- a/llvm/test/Transforms/AggressiveInstCombine/libcalltointrinsic.ll
+++ /dev/null
@@ -1,48 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -passes=aggressive-instcombine -S | FileCheck %s
-target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-define dso_local noundef double @CompareDistmats(ptr noundef %distmat1, ptr noundef %distmat2) local_unnamed_addr {
-; CHECK-LABEL: @CompareDistmats(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label [[FOR_COND:%.*]]
-; CHECK:       for.cond:
-; CHECK-NEXT:    [[RMSD_0:%.*]] = phi double [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[TMP2:%.*]], [[FOR_BODY:%.*]] ]
-; CHECK-NEXT:    [[CMP:%.*]] = phi i1 [ true, [[ENTRY]] ], [ false, [[FOR_BODY]] ]
-; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
-; CHECK:       for.cond.cleanup:
-; CHECK-NEXT:    [[SQRT:%.*]] = call double @llvm.sqrt.f64(double [[RMSD_0]])
-; CHECK-NEXT:    ret double [[SQRT]]
-; CHECK:       for.body:
-; CHECK-NEXT:    [[TMP0:%.*]] = load double, ptr [[DISTMAT1:%.*]], align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = load double, ptr [[DISTMAT2:%.*]], align 8
-; CHECK-NEXT:    [[SUB:%.*]] = fsub double [[TMP0]], [[TMP1]]
-; CHECK-NEXT:    [[TMP2]] = call double @llvm.fmuladd.f64(double [[SUB]], double [[SUB]], double [[RMSD_0]])
-; CHECK-NEXT:    br label [[FOR_COND]]
-;
-entry:
-  br label %for.cond
-
-for.cond:                                         ; preds = %for.body, %entry
-  %RMSD.0 = phi double [ 0.000000e+00, %entry ], [ %2, %for.body ]
-  %cmp = phi i1 [ true, %entry ], [ false, %for.body ]
-  br i1 %cmp, label %for.body, label %for.cond.cleanup
-
-for.cond.cleanup:                                 ; preds = %for.cond
-  %call = call double @sqrt(double noundef %RMSD.0)
-  ret double %call
-
-for.body:                                         ; preds = %for.cond
-  %0 = load double, ptr %distmat1, align 8
-  %1 = load double, ptr %distmat2, align 8
-  %sub = fsub double %0, %1
-  %2 = call double @llvm.fmuladd.f64(double %sub, double %sub, double %RMSD.0)
-  br label %for.cond
-}
-
-; Function Attrs: mustprogress nofree nounwind willreturn memory(write)
-declare double @sqrt(double noundef) local_unnamed_addr
-
-; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
-declare double @llvm.fmuladd.f64(double, double, double)

>From c58a1bf2c3f10400cfee6d53191da3a264ba61e6 Mon Sep 17 00:00:00 2001
From: Kshitij Paranjape <kshitijvparanjape at gmail.com>
Date: Sat, 17 Jan 2026 02:01:15 +0530
Subject: [PATCH 3/3] Check for null value after dyn_cast

---
 llvm/lib/Analysis/ValueTracking.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 557d2f407a7ee..d4949f7551468 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -5959,6 +5959,8 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
     if (P->getNumIncomingValues() == 2) {
       Value *RecurValue = P->getIncomingValue(1);
       IntrinsicInst* I = dyn_cast<IntrinsicInst>(RecurValue);
+      if (!I)
+        break;
       Value *R, *L;
       Value *Init;
       PHINode *PN;



More information about the llvm-commits mailing list