[llvm] [InlineCost]: Optimize inlining of recursive function. (PR #139982)

Hassnaa Hamdi via llvm-commits llvm-commits at lists.llvm.org
Mon Jun 2 06:44:47 PDT 2025


https://github.com/hassnaaHamdi updated https://github.com/llvm/llvm-project/pull/139982

>From c219bb03e15a82b638ebf96005667017331fc4c4 Mon Sep 17 00:00:00 2001
From: Hassnaa Hamdi <hassnaa.hamdi at arm.com>
Date: Wed, 14 May 2025 03:50:01 +0000
Subject: [PATCH 1/6] [ValueTracking][NFC]: Use injected condition to compute
 known FPClass

---
 llvm/include/llvm/Analysis/SimplifyQuery.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/llvm/include/llvm/Analysis/SimplifyQuery.h b/llvm/include/llvm/Analysis/SimplifyQuery.h
index d1d34f22a2fc5..7c13880ab0bd6 100644
--- a/llvm/include/llvm/Analysis/SimplifyQuery.h
+++ b/llvm/include/llvm/Analysis/SimplifyQuery.h
@@ -63,6 +63,8 @@ struct InstrInfoQuery {
 struct CondContext {
   Value *Cond;
   bool Invert = false;
+  // Condition is true if CxtI is in the true successor of Cond.
+  bool CondIsTrue = false;
   SmallPtrSet<Value *, 4> AffectedValues;
 
   CondContext(Value *Cond) : Cond(Cond) {}

>From d4d36df22ee56c90a2f43afa4050b3d73358e92a Mon Sep 17 00:00:00 2001
From: Hassnaa Hamdi <hassnaa.hamdi at arm.com>
Date: Fri, 30 May 2025 16:44:40 +0000
Subject: [PATCH 2/6] Use !CondContext.Invert instead of CondIsTrue

---
 llvm/include/llvm/Analysis/SimplifyQuery.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/llvm/include/llvm/Analysis/SimplifyQuery.h b/llvm/include/llvm/Analysis/SimplifyQuery.h
index 7c13880ab0bd6..d1d34f22a2fc5 100644
--- a/llvm/include/llvm/Analysis/SimplifyQuery.h
+++ b/llvm/include/llvm/Analysis/SimplifyQuery.h
@@ -63,8 +63,6 @@ struct InstrInfoQuery {
 struct CondContext {
   Value *Cond;
   bool Invert = false;
-  // Condition is true if CxtI is in the true successor of Cond.
-  bool CondIsTrue = false;
   SmallPtrSet<Value *, 4> AffectedValues;
 
   CondContext(Value *Cond) : Cond(Cond) {}

>From c4004e3e923de5836dc167022e531aed297d5414 Mon Sep 17 00:00:00 2001
From: Hassnaa Hamdi <hassnaa.hamdi at arm.com>
Date: Wed, 14 May 2025 22:15:34 +0000
Subject: [PATCH 3/6] [InlineCost][precommit]: Add test file

---
 .../Transforms/Inline/inline-recursive-fn2.ll | 39 +++++++++++++++++++
 1 file changed, 39 insertions(+)
 create mode 100644 llvm/test/Transforms/Inline/inline-recursive-fn2.ll

diff --git a/llvm/test/Transforms/Inline/inline-recursive-fn2.ll b/llvm/test/Transforms/Inline/inline-recursive-fn2.ll
new file mode 100644
index 0000000000000..cda08b613ddb8
--- /dev/null
+++ b/llvm/test/Transforms/Inline/inline-recursive-fn2.ll
@@ -0,0 +1,39 @@
+; RUN: opt -passes='cgscc(inline),instcombine,cgscc(inline)' -S -debug-only=inline -disable-output < %s 2>&1 | FileCheck %s
+
+; CHECK:  Inlining calls in: test
+; CHECK:      Function size: 2
+; CHECK:      Inlining (cost=-35, threshold=337), Call:   %call = tail call float @inline_rec_true_successor(float %x, float %scale)
+; CHECK:      Size after inlining: 10
+
+; CHECK:  Inlining calls in: inline_rec_true_successor
+; CHECK:      Function size: 10
+; CHECK:      Inlining (cost=-35, threshold=337), Call:   %call = tail call float @inline_rec_true_successor(float %fneg, float %scale)
+; CHECK:      Size after inlining: 17
+; CHECK:      NOT Inlining (cost=never): noinline function attribute, Call:   %call_test = tail call float @test(float %fneg, float %common.ret18.op.i)
+
+
+define float @test(float %x, float %scale) noinline {
+entry:
+  %call = tail call float @inline_rec_true_successor(float %x, float %scale)
+  ret float %call
+}
+
+define float @inline_rec_true_successor(float %x, float %scale)  {
+entry:
+  %cmp = fcmp olt float %x, 0.000000e+00
+  br i1 %cmp, label %if.then, label %if.end
+
+common.ret18:                                     ; preds = %if.then, %if.end
+  %common.ret18.op = phi float [ %call_test, %if.then ], [ %mul, %if.end ]
+  ret float %common.ret18.op
+
+if.then:                                          ; preds = %entry
+  %fneg = fneg float %x
+  %call = tail call float @inline_rec_true_successor(float %fneg, float %scale)
+  %call_test = tail call float @test(float %fneg, float %call)
+  br label %common.ret18
+
+if.end:                                           ; preds = %entry
+  %mul = fmul float %x, %scale
+  br label %common.ret18
+}

>From ae2a97f7d395a4805d0b1382bb78f59c3fdd4eee Mon Sep 17 00:00:00 2001
From: Hassnaa Hamdi <hassnaa.hamdi at arm.com>
Date: Wed, 14 May 2025 22:50:26 +0000
Subject: [PATCH 4/6] [InlineCost]: Optimize inlining of recursive function.

- Consider inlining recursive function of depth 1 only when
the caller is the function itself instead of inlining it
for each callsite so that we avoid redundant work.

- Use CondContext instead of DomTree for better compilation time.
---
 llvm/lib/Analysis/InlineCost.cpp              | 102 ++++++++----------
 .../Transforms/Inline/inline-recursive-fn2.ll |  10 +-
 2 files changed, 52 insertions(+), 60 deletions(-)

diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index 8ddfa1e4eb6f7..c51bc12acc168 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -1688,66 +1688,52 @@ bool CallAnalyzer::simplifyCmpInstForRecCall(CmpInst &Cmp) {
   if (!isa<Argument>(Cmp.getOperand(0)) || !isa<Constant>(Cmp.getOperand(1)))
     return false;
   auto *CmpOp = Cmp.getOperand(0);
-  Function *F = Cmp.getFunction();
-  // Iterate over the users of the function to check if it's a recursive
-  // function:
-  for (auto *U : F->users()) {
-    CallInst *Call = dyn_cast<CallInst>(U);
-    if (!Call || Call->getFunction() != F || Call->getCalledFunction() != F)
-      continue;
-    auto *CallBB = Call->getParent();
-    auto *Predecessor = CallBB->getSinglePredecessor();
-    // Only handle the case when the callsite has a single predecessor:
-    if (!Predecessor)
-      continue;
+  // Make sure that the callsite is recursive:
+  if (CandidateCall.getCaller() != &F)
+    return false;
+  CallInst *CallInstr = dyn_cast<CallInst>(&CandidateCall);
+  // Only handle the case when the callsite has a single predecessor:
+  auto *CallBB = CallInstr->getParent();
+  auto *Predecessor = CallBB->getSinglePredecessor();
+  if (!Predecessor)
+    return false;
+  // Check if the callsite is guarded by the same Cmp instruction:
+  auto *Br = dyn_cast<BranchInst>(Predecessor->getTerminator());
+  if (!Br || Br->isUnconditional() || Br->getCondition() != &Cmp)
+    return false;
 
-    auto *Br = dyn_cast<BranchInst>(Predecessor->getTerminator());
-    if (!Br || Br->isUnconditional())
-      continue;
-    // Check if the Br condition is the same Cmp instr we are investigating:
-    if (Br->getCondition() != &Cmp)
-      continue;
-    // Check if there are any arg of the recursive callsite is affecting the cmp
-    // instr:
-    bool ArgFound = false;
-    Value *FuncArg = nullptr, *CallArg = nullptr;
-    for (unsigned ArgNum = 0;
-         ArgNum < F->arg_size() && ArgNum < Call->arg_size(); ArgNum++) {
-      FuncArg = F->getArg(ArgNum);
-      CallArg = Call->getArgOperand(ArgNum);
-      if (FuncArg == CmpOp && CallArg != CmpOp) {
-        ArgFound = true;
-        break;
-      }
-    }
-    if (!ArgFound)
-      continue;
-    // Now we have a recursive call that is guarded by a cmp instruction.
-    // Check if this cmp can be simplified:
-    SimplifyQuery SQ(DL, dyn_cast<Instruction>(CallArg));
-    DomConditionCache DC;
-    DC.registerBranch(Br);
-    SQ.DC = &DC;
-    if (DT.root_size() == 0) {
-      // Dominator tree was never constructed for any function yet.
-      DT.recalculate(*F);
-    } else if (DT.getRoot()->getParent() != F) {
-      // Dominator tree was constructed for a different function, recalculate
-      // it for the current function.
-      DT.recalculate(*F);
+  // Check if there is any arg of the recursive callsite is affecting the cmp
+  // instr:
+  bool ArgFound = false;
+  Value *FuncArg = nullptr, *CallArg = nullptr;
+  for (unsigned ArgNum = 0;
+       ArgNum < F.arg_size() && ArgNum < CallInstr->arg_size(); ArgNum++) {
+    FuncArg = F.getArg(ArgNum);
+    CallArg = CallInstr->getArgOperand(ArgNum);
+    if (FuncArg == CmpOp && CallArg != CmpOp) {
+      ArgFound = true;
+      break;
     }
-    SQ.DT = &DT;
-    Value *SimplifiedInstruction = llvm::simplifyInstructionWithOperands(
-        cast<CmpInst>(&Cmp), {CallArg, Cmp.getOperand(1)}, SQ);
-    if (auto *ConstVal = dyn_cast_or_null<ConstantInt>(SimplifiedInstruction)) {
-      bool IsTrueSuccessor = CallBB == Br->getSuccessor(0);
-      // Make sure that the BB of the recursive call is NOT the next successor
-      // of the icmp. In other words, make sure that the recursion depth is 1.
-      if ((ConstVal->isOne() && !IsTrueSuccessor) ||
-          (ConstVal->isZero() && IsTrueSuccessor)) {
-        SimplifiedValues[&Cmp] = ConstVal;
-        return true;
-      }
+  }
+  if (!ArgFound)
+    return false;
+
+  // Now we have a recursive call that is guarded by a cmp instruction.
+  // Check if this cmp can be simplified:
+  SimplifyQuery SQ(DL, dyn_cast<Instruction>(CallArg));
+  CondContext CC(cast<Value>(&Cmp));
+  CC.CondIsTrue = CallBB == Br->getSuccessor(0);
+  SQ.CC = &CC;
+  CC.AffectedValues.insert(FuncArg);
+  Value *SimplifiedInstruction = llvm::simplifyInstructionWithOperands(
+      cast<CmpInst>(&Cmp), {CallArg, Cmp.getOperand(1)}, SQ);
+  if (auto *ConstVal = dyn_cast_or_null<ConstantInt>(SimplifiedInstruction)) {
+    // Make sure that the BB of the recursive call is NOT the true successor
+    // of the icmp. In other words, make sure that the recursion depth is 1.
+    if ((ConstVal->isOne() && !CC.CondIsTrue) ||
+        (ConstVal->isZero() && CC.CondIsTrue)) {
+      SimplifiedValues[&Cmp] = ConstVal;
+      return true;
     }
   }
   return false;
diff --git a/llvm/test/Transforms/Inline/inline-recursive-fn2.ll b/llvm/test/Transforms/Inline/inline-recursive-fn2.ll
index cda08b613ddb8..0323a6ee3a75a 100644
--- a/llvm/test/Transforms/Inline/inline-recursive-fn2.ll
+++ b/llvm/test/Transforms/Inline/inline-recursive-fn2.ll
@@ -2,15 +2,21 @@
 
 ; CHECK:  Inlining calls in: test
 ; CHECK:      Function size: 2
-; CHECK:      Inlining (cost=-35, threshold=337), Call:   %call = tail call float @inline_rec_true_successor(float %x, float %scale)
-; CHECK:      Size after inlining: 10
+; CHECK:      NOT Inlining (cost=never): recursive, Call:   %call = tail call float @inline_rec_true_successor(float %x, float %scale)
 
 ; CHECK:  Inlining calls in: inline_rec_true_successor
 ; CHECK:      Function size: 10
 ; CHECK:      Inlining (cost=-35, threshold=337), Call:   %call = tail call float @inline_rec_true_successor(float %fneg, float %scale)
 ; CHECK:      Size after inlining: 17
 ; CHECK:      NOT Inlining (cost=never): noinline function attribute, Call:   %call_test = tail call float @test(float %fneg, float %common.ret18.op.i)
+; CHECK:      NOT Inlining (cost=never): noinline function attribute, Call:   %call_test.i = tail call float @test(float %x, float %call.i)
+; CHECK:  Skipping inlining due to history: inline_rec_true_successor -> inline_rec_true_successor
+; CHECK:  Updated inlining SCC: (test, inline_rec_true_successor)
 
+; CHECK:  Inlining calls in: test
+; CHECK:      Function size: 2
+; CHECK:      Inlining (cost=25, threshold=225), Call:   %call = tail call float @inline_rec_true_successor(float %x, float %scale)
+; CHECK:      Size after inlining: 10
 
 define float @test(float %x, float %scale) noinline {
 entry:

>From 07181585fcc6cb24923acbc4329d4636af9ebb7a Mon Sep 17 00:00:00 2001
From: Hassnaa Hamdi <hassnaa.hamdi at arm.com>
Date: Sun, 1 Jun 2025 18:17:25 +0000
Subject: [PATCH 5/6] resolve comments

---
 llvm/lib/Analysis/InlineCost.cpp                | 17 +++++++----------
 .../Transforms/Inline/inline-recursive-fn2.ll   |  1 +
 2 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index c51bc12acc168..7bd1f18004580 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -263,8 +263,6 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
   // Cache the DataLayout since we use it a lot.
   const DataLayout &DL;
 
-  DominatorTree DT;
-
   /// The OptimizationRemarkEmitter available for this compilation.
   OptimizationRemarkEmitter *ORE;
 
@@ -1691,9 +1689,8 @@ bool CallAnalyzer::simplifyCmpInstForRecCall(CmpInst &Cmp) {
   // Make sure that the callsite is recursive:
   if (CandidateCall.getCaller() != &F)
     return false;
-  CallInst *CallInstr = dyn_cast<CallInst>(&CandidateCall);
   // Only handle the case when the callsite has a single predecessor:
-  auto *CallBB = CallInstr->getParent();
+  auto *CallBB = CandidateCall.getParent();
   auto *Predecessor = CallBB->getSinglePredecessor();
   if (!Predecessor)
     return false;
@@ -1707,9 +1704,9 @@ bool CallAnalyzer::simplifyCmpInstForRecCall(CmpInst &Cmp) {
   bool ArgFound = false;
   Value *FuncArg = nullptr, *CallArg = nullptr;
   for (unsigned ArgNum = 0;
-       ArgNum < F.arg_size() && ArgNum < CallInstr->arg_size(); ArgNum++) {
+       ArgNum < F.arg_size() && ArgNum < CandidateCall.arg_size(); ArgNum++) {
     FuncArg = F.getArg(ArgNum);
-    CallArg = CallInstr->getArgOperand(ArgNum);
+    CallArg = CandidateCall.getArgOperand(ArgNum);
     if (FuncArg == CmpOp && CallArg != CmpOp) {
       ArgFound = true;
       break;
@@ -1721,8 +1718,8 @@ bool CallAnalyzer::simplifyCmpInstForRecCall(CmpInst &Cmp) {
   // Now we have a recursive call that is guarded by a cmp instruction.
   // Check if this cmp can be simplified:
   SimplifyQuery SQ(DL, dyn_cast<Instruction>(CallArg));
-  CondContext CC(cast<Value>(&Cmp));
-  CC.CondIsTrue = CallBB == Br->getSuccessor(0);
+  CondContext CC(&Cmp);
+  CC.Invert = (CallBB != Br->getSuccessor(0));
   SQ.CC = &CC;
   CC.AffectedValues.insert(FuncArg);
   Value *SimplifiedInstruction = llvm::simplifyInstructionWithOperands(
@@ -1730,8 +1727,8 @@ bool CallAnalyzer::simplifyCmpInstForRecCall(CmpInst &Cmp) {
   if (auto *ConstVal = dyn_cast_or_null<ConstantInt>(SimplifiedInstruction)) {
     // Make sure that the BB of the recursive call is NOT the true successor
     // of the icmp. In other words, make sure that the recursion depth is 1.
-    if ((ConstVal->isOne() && !CC.CondIsTrue) ||
-        (ConstVal->isZero() && CC.CondIsTrue)) {
+    if ((ConstVal->isOne() && CC.Invert) ||
+        (ConstVal->isZero() && !CC.Invert)) {
       SimplifiedValues[&Cmp] = ConstVal;
       return true;
     }
diff --git a/llvm/test/Transforms/Inline/inline-recursive-fn2.ll b/llvm/test/Transforms/Inline/inline-recursive-fn2.ll
index 0323a6ee3a75a..80e43733112be 100644
--- a/llvm/test/Transforms/Inline/inline-recursive-fn2.ll
+++ b/llvm/test/Transforms/Inline/inline-recursive-fn2.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
 ; RUN: opt -passes='cgscc(inline),instcombine,cgscc(inline)' -S -debug-only=inline -disable-output < %s 2>&1 | FileCheck %s
 
 ; CHECK:  Inlining calls in: test

>From 6f7b1a4db1f383911c540ecfa2eeaf1f958ecc52 Mon Sep 17 00:00:00 2001
From: Hassnaa Hamdi <hassnaa.hamdi at arm.com>
Date: Mon, 2 Jun 2025 13:43:06 +0000
Subject: [PATCH 6/6] Rebase. Add explanation comment to the test file

---
 llvm/test/Transforms/Inline/inline-recursive-fn2.ll | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/llvm/test/Transforms/Inline/inline-recursive-fn2.ll b/llvm/test/Transforms/Inline/inline-recursive-fn2.ll
index 80e43733112be..52d7b21902e8e 100644
--- a/llvm/test/Transforms/Inline/inline-recursive-fn2.ll
+++ b/llvm/test/Transforms/Inline/inline-recursive-fn2.ll
@@ -1,6 +1,9 @@
 ; REQUIRES: asserts
 ; RUN: opt -passes='cgscc(inline),instcombine,cgscc(inline)' -S -debug-only=inline -disable-output < %s 2>&1 | FileCheck %s
 
+; This test shows that the recursive function will not get simplified
+; unless the caller is the function itself, not another different caller.
+
 ; CHECK:  Inlining calls in: test
 ; CHECK:      Function size: 2
 ; CHECK:      NOT Inlining (cost=never): recursive, Call:   %call = tail call float @inline_rec_true_successor(float %x, float %scale)



More information about the llvm-commits mailing list