[llvm] [InlineCost]: Optimize inlining of recursive function. (PR #139982)
Hassnaa Hamdi via llvm-commits
llvm-commits at lists.llvm.org
Sun Jun 1 11:25:22 PDT 2025
https://github.com/hassnaaHamdi updated https://github.com/llvm/llvm-project/pull/139982
>From 4fa3cfe7875b3cfaa3f90264589e8b0e714cc274 Mon Sep 17 00:00:00 2001
From: Hassnaa Hamdi <hassnaa.hamdi at arm.com>
Date: Wed, 14 May 2025 03:50:01 +0000
Subject: [PATCH 1/5] [ValueTracking][NFC]: Use injected condition to compute
known FPClass
---
llvm/include/llvm/Analysis/SimplifyQuery.h | 2 ++
llvm/lib/Analysis/ValueTracking.cpp | 4 ++++
2 files changed, 6 insertions(+)
diff --git a/llvm/include/llvm/Analysis/SimplifyQuery.h b/llvm/include/llvm/Analysis/SimplifyQuery.h
index e8f43c8c2e91f..063ca4eaa9db0 100644
--- a/llvm/include/llvm/Analysis/SimplifyQuery.h
+++ b/llvm/include/llvm/Analysis/SimplifyQuery.h
@@ -62,6 +62,8 @@ struct InstrInfoQuery {
struct CondContext {
Value *Cond;
bool Invert = false;
+ // Condition is true if CxtI is in the true successor of Cond.
+ bool CondIsTrue = false;
SmallPtrSet<Value *, 4> AffectedValues;
CondContext(Value *Cond) : Cond(Cond) {}
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 3d403531cea2f..e7d937a0893ab 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -5014,6 +5014,10 @@ static KnownFPClass computeKnownFPClassFromContext(const Value *V,
const SimplifyQuery &Q) {
KnownFPClass KnownFromContext;
+ if (Q.CC && Q.CC->AffectedValues.contains(V))
+ computeKnownFPClassFromCond(V, Q.CC->Cond, 0, Q.CC->CondIsTrue, Q.CxtI,
+ KnownFromContext);
+
if (!Q.CxtI)
return KnownFromContext;
>From 308e479325ad01d1938f361501eea691ce8ab8b6 Mon Sep 17 00:00:00 2001
From: Hassnaa Hamdi <hassnaa.hamdi at arm.com>
Date: Fri, 30 May 2025 16:44:40 +0000
Subject: [PATCH 2/5] Use !CondContext.Invert instead of CondIsTrue
---
llvm/include/llvm/Analysis/SimplifyQuery.h | 2 --
llvm/lib/Analysis/ValueTracking.cpp | 2 +-
2 files changed, 1 insertion(+), 3 deletions(-)
diff --git a/llvm/include/llvm/Analysis/SimplifyQuery.h b/llvm/include/llvm/Analysis/SimplifyQuery.h
index 063ca4eaa9db0..e8f43c8c2e91f 100644
--- a/llvm/include/llvm/Analysis/SimplifyQuery.h
+++ b/llvm/include/llvm/Analysis/SimplifyQuery.h
@@ -62,8 +62,6 @@ struct InstrInfoQuery {
struct CondContext {
Value *Cond;
bool Invert = false;
- // Condition is true if CxtI is in the true successor of Cond.
- bool CondIsTrue = false;
SmallPtrSet<Value *, 4> AffectedValues;
CondContext(Value *Cond) : Cond(Cond) {}
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index e7d937a0893ab..025a4d3cf56e2 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -5015,7 +5015,7 @@ static KnownFPClass computeKnownFPClassFromContext(const Value *V,
KnownFPClass KnownFromContext;
if (Q.CC && Q.CC->AffectedValues.contains(V))
- computeKnownFPClassFromCond(V, Q.CC->Cond, 0, Q.CC->CondIsTrue, Q.CxtI,
+ computeKnownFPClassFromCond(V, Q.CC->Cond, 0, !Q.CC->Invert, Q.CxtI,
KnownFromContext);
if (!Q.CxtI)
>From 3f0cabbf2d1ef6f50aa5d8a24a5905b79792c06a Mon Sep 17 00:00:00 2001
From: Hassnaa Hamdi <hassnaa.hamdi at arm.com>
Date: Wed, 14 May 2025 22:15:34 +0000
Subject: [PATCH 3/5] [InlineCost][precommit]: Add test file
---
.../Transforms/Inline/inline-recursive-fn2.ll | 39 +++++++++++++++++++
1 file changed, 39 insertions(+)
create mode 100644 llvm/test/Transforms/Inline/inline-recursive-fn2.ll
diff --git a/llvm/test/Transforms/Inline/inline-recursive-fn2.ll b/llvm/test/Transforms/Inline/inline-recursive-fn2.ll
new file mode 100644
index 0000000000000..cda08b613ddb8
--- /dev/null
+++ b/llvm/test/Transforms/Inline/inline-recursive-fn2.ll
@@ -0,0 +1,39 @@
+; RUN: opt -passes='cgscc(inline),instcombine,cgscc(inline)' -S -debug-only=inline -disable-output < %s 2>&1 | FileCheck %s
+
+; CHECK: Inlining calls in: test
+; CHECK: Function size: 2
+; CHECK: Inlining (cost=-35, threshold=337), Call: %call = tail call float @inline_rec_true_successor(float %x, float %scale)
+; CHECK: Size after inlining: 10
+
+; CHECK: Inlining calls in: inline_rec_true_successor
+; CHECK: Function size: 10
+; CHECK: Inlining (cost=-35, threshold=337), Call: %call = tail call float @inline_rec_true_successor(float %fneg, float %scale)
+; CHECK: Size after inlining: 17
+; CHECK: NOT Inlining (cost=never): noinline function attribute, Call: %call_test = tail call float @test(float %fneg, float %common.ret18.op.i)
+
+
+define float @test(float %x, float %scale) noinline {
+entry:
+ %call = tail call float @inline_rec_true_successor(float %x, float %scale)
+ ret float %call
+}
+
+define float @inline_rec_true_successor(float %x, float %scale) {
+entry:
+ %cmp = fcmp olt float %x, 0.000000e+00
+ br i1 %cmp, label %if.then, label %if.end
+
+common.ret18: ; preds = %if.then, %if.end
+ %common.ret18.op = phi float [ %call_test, %if.then ], [ %mul, %if.end ]
+ ret float %common.ret18.op
+
+if.then: ; preds = %entry
+ %fneg = fneg float %x
+ %call = tail call float @inline_rec_true_successor(float %fneg, float %scale)
+ %call_test = tail call float @test(float %fneg, float %call)
+ br label %common.ret18
+
+if.end: ; preds = %entry
+ %mul = fmul float %x, %scale
+ br label %common.ret18
+}
>From 4ea732909099aec6ee78c23edcdeca9014af52bf Mon Sep 17 00:00:00 2001
From: Hassnaa Hamdi <hassnaa.hamdi at arm.com>
Date: Wed, 14 May 2025 22:50:26 +0000
Subject: [PATCH 4/5] [InlineCost]: Optimize inlining of recursive function.
- Consider inlining recursive function of depth 1 only when
the caller is the function itself instead of inlining it
for each callsite so that we avoid redundant work.
- Use CondContext instead of DomTree for better compilation time.
---
llvm/lib/Analysis/InlineCost.cpp | 102 ++++++++----------
.../Transforms/Inline/inline-recursive-fn2.ll | 10 +-
2 files changed, 52 insertions(+), 60 deletions(-)
diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index 8ddfa1e4eb6f7..c51bc12acc168 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -1688,66 +1688,52 @@ bool CallAnalyzer::simplifyCmpInstForRecCall(CmpInst &Cmp) {
if (!isa<Argument>(Cmp.getOperand(0)) || !isa<Constant>(Cmp.getOperand(1)))
return false;
auto *CmpOp = Cmp.getOperand(0);
- Function *F = Cmp.getFunction();
- // Iterate over the users of the function to check if it's a recursive
- // function:
- for (auto *U : F->users()) {
- CallInst *Call = dyn_cast<CallInst>(U);
- if (!Call || Call->getFunction() != F || Call->getCalledFunction() != F)
- continue;
- auto *CallBB = Call->getParent();
- auto *Predecessor = CallBB->getSinglePredecessor();
- // Only handle the case when the callsite has a single predecessor:
- if (!Predecessor)
- continue;
+ // Make sure that the callsite is recursive:
+ if (CandidateCall.getCaller() != &F)
+ return false;
+ CallInst *CallInstr = dyn_cast<CallInst>(&CandidateCall);
+ // Only handle the case when the callsite has a single predecessor:
+ auto *CallBB = CallInstr->getParent();
+ auto *Predecessor = CallBB->getSinglePredecessor();
+ if (!Predecessor)
+ return false;
+ // Check if the callsite is guarded by the same Cmp instruction:
+ auto *Br = dyn_cast<BranchInst>(Predecessor->getTerminator());
+ if (!Br || Br->isUnconditional() || Br->getCondition() != &Cmp)
+ return false;
- auto *Br = dyn_cast<BranchInst>(Predecessor->getTerminator());
- if (!Br || Br->isUnconditional())
- continue;
- // Check if the Br condition is the same Cmp instr we are investigating:
- if (Br->getCondition() != &Cmp)
- continue;
- // Check if there are any arg of the recursive callsite is affecting the cmp
- // instr:
- bool ArgFound = false;
- Value *FuncArg = nullptr, *CallArg = nullptr;
- for (unsigned ArgNum = 0;
- ArgNum < F->arg_size() && ArgNum < Call->arg_size(); ArgNum++) {
- FuncArg = F->getArg(ArgNum);
- CallArg = Call->getArgOperand(ArgNum);
- if (FuncArg == CmpOp && CallArg != CmpOp) {
- ArgFound = true;
- break;
- }
- }
- if (!ArgFound)
- continue;
- // Now we have a recursive call that is guarded by a cmp instruction.
- // Check if this cmp can be simplified:
- SimplifyQuery SQ(DL, dyn_cast<Instruction>(CallArg));
- DomConditionCache DC;
- DC.registerBranch(Br);
- SQ.DC = &DC;
- if (DT.root_size() == 0) {
- // Dominator tree was never constructed for any function yet.
- DT.recalculate(*F);
- } else if (DT.getRoot()->getParent() != F) {
- // Dominator tree was constructed for a different function, recalculate
- // it for the current function.
- DT.recalculate(*F);
+ // Check if there is any arg of the recursive callsite is affecting the cmp
+ // instr:
+ bool ArgFound = false;
+ Value *FuncArg = nullptr, *CallArg = nullptr;
+ for (unsigned ArgNum = 0;
+ ArgNum < F.arg_size() && ArgNum < CallInstr->arg_size(); ArgNum++) {
+ FuncArg = F.getArg(ArgNum);
+ CallArg = CallInstr->getArgOperand(ArgNum);
+ if (FuncArg == CmpOp && CallArg != CmpOp) {
+ ArgFound = true;
+ break;
}
- SQ.DT = &DT;
- Value *SimplifiedInstruction = llvm::simplifyInstructionWithOperands(
- cast<CmpInst>(&Cmp), {CallArg, Cmp.getOperand(1)}, SQ);
- if (auto *ConstVal = dyn_cast_or_null<ConstantInt>(SimplifiedInstruction)) {
- bool IsTrueSuccessor = CallBB == Br->getSuccessor(0);
- // Make sure that the BB of the recursive call is NOT the next successor
- // of the icmp. In other words, make sure that the recursion depth is 1.
- if ((ConstVal->isOne() && !IsTrueSuccessor) ||
- (ConstVal->isZero() && IsTrueSuccessor)) {
- SimplifiedValues[&Cmp] = ConstVal;
- return true;
- }
+ }
+ if (!ArgFound)
+ return false;
+
+ // Now we have a recursive call that is guarded by a cmp instruction.
+ // Check if this cmp can be simplified:
+ SimplifyQuery SQ(DL, dyn_cast<Instruction>(CallArg));
+ CondContext CC(cast<Value>(&Cmp));
+ CC.CondIsTrue = CallBB == Br->getSuccessor(0);
+ SQ.CC = &CC;
+ CC.AffectedValues.insert(FuncArg);
+ Value *SimplifiedInstruction = llvm::simplifyInstructionWithOperands(
+ cast<CmpInst>(&Cmp), {CallArg, Cmp.getOperand(1)}, SQ);
+ if (auto *ConstVal = dyn_cast_or_null<ConstantInt>(SimplifiedInstruction)) {
+ // Make sure that the BB of the recursive call is NOT the true successor
+ // of the icmp. In other words, make sure that the recursion depth is 1.
+ if ((ConstVal->isOne() && !CC.CondIsTrue) ||
+ (ConstVal->isZero() && CC.CondIsTrue)) {
+ SimplifiedValues[&Cmp] = ConstVal;
+ return true;
}
}
return false;
diff --git a/llvm/test/Transforms/Inline/inline-recursive-fn2.ll b/llvm/test/Transforms/Inline/inline-recursive-fn2.ll
index cda08b613ddb8..0323a6ee3a75a 100644
--- a/llvm/test/Transforms/Inline/inline-recursive-fn2.ll
+++ b/llvm/test/Transforms/Inline/inline-recursive-fn2.ll
@@ -2,15 +2,21 @@
; CHECK: Inlining calls in: test
; CHECK: Function size: 2
-; CHECK: Inlining (cost=-35, threshold=337), Call: %call = tail call float @inline_rec_true_successor(float %x, float %scale)
-; CHECK: Size after inlining: 10
+; CHECK: NOT Inlining (cost=never): recursive, Call: %call = tail call float @inline_rec_true_successor(float %x, float %scale)
; CHECK: Inlining calls in: inline_rec_true_successor
; CHECK: Function size: 10
; CHECK: Inlining (cost=-35, threshold=337), Call: %call = tail call float @inline_rec_true_successor(float %fneg, float %scale)
; CHECK: Size after inlining: 17
; CHECK: NOT Inlining (cost=never): noinline function attribute, Call: %call_test = tail call float @test(float %fneg, float %common.ret18.op.i)
+; CHECK: NOT Inlining (cost=never): noinline function attribute, Call: %call_test.i = tail call float @test(float %x, float %call.i)
+; CHECK: Skipping inlining due to history: inline_rec_true_successor -> inline_rec_true_successor
+; CHECK: Updated inlining SCC: (test, inline_rec_true_successor)
+; CHECK: Inlining calls in: test
+; CHECK: Function size: 2
+; CHECK: Inlining (cost=25, threshold=225), Call: %call = tail call float @inline_rec_true_successor(float %x, float %scale)
+; CHECK: Size after inlining: 10
define float @test(float %x, float %scale) noinline {
entry:
>From fe616fbde53fba9eb7612e29558f6e2c91c96ef9 Mon Sep 17 00:00:00 2001
From: Hassnaa Hamdi <hassnaa.hamdi at arm.com>
Date: Sun, 1 Jun 2025 18:17:25 +0000
Subject: [PATCH 5/5] resolve comments
---
llvm/lib/Analysis/InlineCost.cpp | 17 +++++++----------
.../Transforms/Inline/inline-recursive-fn2.ll | 1 +
2 files changed, 8 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index c51bc12acc168..7bd1f18004580 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -263,8 +263,6 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
// Cache the DataLayout since we use it a lot.
const DataLayout &DL;
- DominatorTree DT;
-
/// The OptimizationRemarkEmitter available for this compilation.
OptimizationRemarkEmitter *ORE;
@@ -1691,9 +1689,8 @@ bool CallAnalyzer::simplifyCmpInstForRecCall(CmpInst &Cmp) {
// Make sure that the callsite is recursive:
if (CandidateCall.getCaller() != &F)
return false;
- CallInst *CallInstr = dyn_cast<CallInst>(&CandidateCall);
// Only handle the case when the callsite has a single predecessor:
- auto *CallBB = CallInstr->getParent();
+ auto *CallBB = CandidateCall.getParent();
auto *Predecessor = CallBB->getSinglePredecessor();
if (!Predecessor)
return false;
@@ -1707,9 +1704,9 @@ bool CallAnalyzer::simplifyCmpInstForRecCall(CmpInst &Cmp) {
bool ArgFound = false;
Value *FuncArg = nullptr, *CallArg = nullptr;
for (unsigned ArgNum = 0;
- ArgNum < F.arg_size() && ArgNum < CallInstr->arg_size(); ArgNum++) {
+ ArgNum < F.arg_size() && ArgNum < CandidateCall.arg_size(); ArgNum++) {
FuncArg = F.getArg(ArgNum);
- CallArg = CallInstr->getArgOperand(ArgNum);
+ CallArg = CandidateCall.getArgOperand(ArgNum);
if (FuncArg == CmpOp && CallArg != CmpOp) {
ArgFound = true;
break;
@@ -1721,8 +1718,8 @@ bool CallAnalyzer::simplifyCmpInstForRecCall(CmpInst &Cmp) {
// Now we have a recursive call that is guarded by a cmp instruction.
// Check if this cmp can be simplified:
SimplifyQuery SQ(DL, dyn_cast<Instruction>(CallArg));
- CondContext CC(cast<Value>(&Cmp));
- CC.CondIsTrue = CallBB == Br->getSuccessor(0);
+ CondContext CC(&Cmp);
+ CC.Invert = (CallBB != Br->getSuccessor(0));
SQ.CC = &CC;
CC.AffectedValues.insert(FuncArg);
Value *SimplifiedInstruction = llvm::simplifyInstructionWithOperands(
@@ -1730,8 +1727,8 @@ bool CallAnalyzer::simplifyCmpInstForRecCall(CmpInst &Cmp) {
if (auto *ConstVal = dyn_cast_or_null<ConstantInt>(SimplifiedInstruction)) {
// Make sure that the BB of the recursive call is NOT the true successor
// of the icmp. In other words, make sure that the recursion depth is 1.
- if ((ConstVal->isOne() && !CC.CondIsTrue) ||
- (ConstVal->isZero() && CC.CondIsTrue)) {
+ if ((ConstVal->isOne() && CC.Invert) ||
+ (ConstVal->isZero() && !CC.Invert)) {
SimplifiedValues[&Cmp] = ConstVal;
return true;
}
diff --git a/llvm/test/Transforms/Inline/inline-recursive-fn2.ll b/llvm/test/Transforms/Inline/inline-recursive-fn2.ll
index 0323a6ee3a75a..80e43733112be 100644
--- a/llvm/test/Transforms/Inline/inline-recursive-fn2.ll
+++ b/llvm/test/Transforms/Inline/inline-recursive-fn2.ll
@@ -1,3 +1,4 @@
+; REQUIRES: asserts
; RUN: opt -passes='cgscc(inline),instcombine,cgscc(inline)' -S -debug-only=inline -disable-output < %s 2>&1 | FileCheck %s
; CHECK: Inlining calls in: test
More information about the llvm-commits
mailing list