[llvm] c9a8e15 - [ICP] Add a few tunings to indirect-call-promotion (#149892)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 24 09:55:31 PDT 2025
Author: xur-llvm
Date: 2025-07-24T09:55:28-07:00
New Revision: c9a8e15494c84730a2a8c0713281b3c8aa5b67c4
URL: https://github.com/llvm/llvm-project/commit/c9a8e15494c84730a2a8c0713281b3c8aa5b67c4
DIFF: https://github.com/llvm/llvm-project/commit/c9a8e15494c84730a2a8c0713281b3c8aa5b67c4.diff
LOG: [ICP] Add a few tunings to indirect-call-promotion (#149892)
[ICP] Add a few tunings to indirect-call-promtion
Indirect-call promotion (ICP) has been adjusted with the following
tunings:
(1) Candidate functions can be now ICP'd even if only a declaration is
present.
(2) All non-cold candidate functions are now considered by ICP.
Previously, only hot targets were considered.
(3) If one target cannot be ICP'd, proceed with the remaining targets
instead of exiting the callsite.
This update hides all tunings under internal options and disables them
by default. They'll be enabled in a later update. There'll also be
another update to address the "not found" issue with indirect targets.
Added:
llvm/test/Transforms/PGOProfile/indirect_call_promotion2.ll
Modified:
llvm/lib/Analysis/ProfileSummaryInfo.cpp
llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
llvm/test/ThinLTO/X86/memprof-icp.ll
llvm/test/Transforms/PGOProfile/icp_mismatch_msg.ll
Removed:
################################################################################
diff --git a/llvm/lib/Analysis/ProfileSummaryInfo.cpp b/llvm/lib/Analysis/ProfileSummaryInfo.cpp
index e8d4e37a4eb7e..f1c3155f2f141 100644
--- a/llvm/lib/Analysis/ProfileSummaryInfo.cpp
+++ b/llvm/lib/Analysis/ProfileSummaryInfo.cpp
@@ -121,8 +121,18 @@ void ProfileSummaryInfo::computeThresholds() {
ProfileSummaryBuilder::getHotCountThreshold(DetailedSummary);
ColdCountThreshold =
ProfileSummaryBuilder::getColdCountThreshold(DetailedSummary);
- assert(ColdCountThreshold <= HotCountThreshold &&
- "Cold count threshold cannot exceed hot count threshold!");
+ // When the hot and cold thresholds are identical, we would classify
+ // a count value as both hot and cold since we are doing an inclusive check
+ // (see ::is{Hot|Cold}Count(). To avoid this undesirable overlap, ensure the
+ // thresholds are distinct.
+ if (HotCountThreshold == ColdCountThreshold) {
+ if (ColdCountThreshold > 0)
+ (*ColdCountThreshold)--;
+ else
+ (*HotCountThreshold)++;
+ }
+ assert(ColdCountThreshold < HotCountThreshold &&
+ "Cold count threshold should be less than hot count threshold!");
if (!hasPartialSampleProfile() || !ScalePartialSampleProfileWorkingSetSize) {
HasHugeWorkingSetSize =
HotEntry.NumCounts > ProfileSummaryHugeWorkingSetSizeThreshold;
diff --git a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
index 854db0ff6864c..f451c2b471aa6 100644
--- a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
+++ b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
@@ -80,6 +80,27 @@ static cl::opt<unsigned>
ICPCSSkip("icp-csskip", cl::init(0), cl::Hidden,
cl::desc("Skip Callsite up to this number for this compilation"));
+// ICP the candidate function even when only a declaration is present.
+static cl::opt<bool> ICPAllowDecls(
+ "icp-allow-decls", cl::init(false), cl::Hidden,
+ cl::desc("Promote the target candidate even when the defintion "
+ " is not available"));
+
+// ICP hot candidate functions only. When setting to false, non-cold functions
+// (warm functions) can also be promoted.
+static cl::opt<bool>
+ ICPAllowHotOnly("icp-allow-hot-only", cl::init(true), cl::Hidden,
+ cl::desc("Promote the target candidate only if it is a "
+ "hot function. Otherwise, warm functions can "
+ "also be promoted"));
+
+// If one target cannot be ICP'd, proceed with the remaining targets instead
+// of exiting the callsite.
+static cl::opt<bool> ICPAllowCandidateSkip(
+ "icp-allow-candidate-skip", cl::init(false), cl::Hidden,
+ cl::desc("Continue with the remaining targets instead of exiting "
+ "when failing in a candidate"));
+
// Set if the pass is called in LTO optimization. The
diff erence for LTO mode
// is the pass won't prefix the source module name to the internal linkage
// symbols.
@@ -330,6 +351,7 @@ class IndirectCallPromoter {
struct PromotionCandidate {
Function *const TargetFunction;
const uint64_t Count;
+ const uint32_t Index;
// The following fields only exists for promotion candidates with vtable
// information.
@@ -341,7 +363,8 @@ class IndirectCallPromoter {
VTableGUIDCountsMap VTableGUIDAndCounts;
SmallVector<Constant *> AddressPoints;
- PromotionCandidate(Function *F, uint64_t C) : TargetFunction(F), Count(C) {}
+ PromotionCandidate(Function *F, uint64_t C, uint32_t I)
+ : TargetFunction(F), Count(C), Index(I) {}
};
// Check if the indirect-call call site should be promoted. Return the number
@@ -356,12 +379,10 @@ class IndirectCallPromoter {
// Promote a list of targets for one indirect-call callsite by comparing
// indirect callee with functions. Return true if there are IR
// transformations and false otherwise.
- bool tryToPromoteWithFuncCmp(CallBase &CB, Instruction *VPtr,
- ArrayRef<PromotionCandidate> Candidates,
- uint64_t TotalCount,
- ArrayRef<InstrProfValueData> ICallProfDataRef,
- uint32_t NumCandidates,
- VTableGUIDCountsMap &VTableGUIDCounts);
+ bool tryToPromoteWithFuncCmp(
+ CallBase &CB, Instruction *VPtr, ArrayRef<PromotionCandidate> Candidates,
+ uint64_t TotalCount, MutableArrayRef<InstrProfValueData> ICallProfDataRef,
+ uint32_t NumCandidates, VTableGUIDCountsMap &VTableGUIDCounts);
// Promote a list of targets for one indirect call by comparing vtables with
// functions. Return true if there are IR transformations and false
@@ -394,12 +415,15 @@ class IndirectCallPromoter {
Constant *getOrCreateVTableAddressPointVar(GlobalVariable *GV,
uint64_t AddressPointOffset);
- void updateFuncValueProfiles(CallBase &CB, ArrayRef<InstrProfValueData> VDs,
+ void updateFuncValueProfiles(CallBase &CB,
+ MutableArrayRef<InstrProfValueData> VDs,
uint64_t Sum, uint32_t MaxMDCount);
void updateVPtrValueProfiles(Instruction *VPtr,
VTableGUIDCountsMap &VTableGUIDCounts);
+ bool isValidTarget(uint64_t, Function *, const CallBase &, uint64_t);
+
public:
IndirectCallPromoter(
Function &Func, Module &M, InstrProfSymtab *Symtab, bool SamplePGO,
@@ -419,6 +443,53 @@ class IndirectCallPromoter {
} // end anonymous namespace
+bool IndirectCallPromoter::isValidTarget(uint64_t Target,
+ Function *TargetFunction,
+ const CallBase &CB, uint64_t Count) {
+ // Don't promote if the symbol is not defined in the module. This avoids
+ // creating a reference to a symbol that doesn't exist in the module
+ // This can happen when we compile with a sample profile collected from
+ // one binary but used for another, which may have profiled targets that
+ // aren't used in the new binary. We might have a declaration initially in
+ // the case where the symbol is globally dead in the binary and removed by
+ // ThinLTO.
+ using namespace ore;
+ if (TargetFunction == nullptr) {
+ LLVM_DEBUG(dbgs() << " Not promote: Cannot find the target\n");
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToFindTarget", &CB)
+ << "Cannot promote indirect call: target with md5sum "
+ << NV("target md5sum", Target)
+ << " not found (count=" << NV("Count", Count) << ")";
+ });
+ return false;
+ }
+ if (!ICPAllowDecls && TargetFunction->isDeclaration()) {
+ LLVM_DEBUG(dbgs() << " Not promote: target definition is not available\n");
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "NoTargetDef", &CB)
+ << "Do not promote indirect call: target with md5sum "
+ << NV("target md5sum", Target)
+ << " definition not available (count=" << ore::NV("Count", Count)
+ << ")";
+ });
+ return false;
+ }
+
+ const char *Reason = nullptr;
+ if (!isLegalToPromote(CB, TargetFunction, &Reason)) {
+
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToPromote", &CB)
+ << "Cannot promote indirect call to "
+ << NV("TargetFunction", TargetFunction)
+ << " (count=" << NV("Count", Count) << "): " << Reason;
+ });
+ return false;
+ }
+ return true;
+}
+
// Indirect-call promotion heuristic. The direct targets are sorted based on
// the count. Stop at the first target that is not promoted.
std::vector<IndirectCallPromoter::PromotionCandidate>
@@ -469,38 +540,15 @@ IndirectCallPromoter::getPromotionCandidatesForCallSite(
break;
}
- // Don't promote if the symbol is not defined in the module. This avoids
- // creating a reference to a symbol that doesn't exist in the module
- // This can happen when we compile with a sample profile collected from
- // one binary but used for another, which may have profiled targets that
- // aren't used in the new binary. We might have a declaration initially in
- // the case where the symbol is globally dead in the binary and removed by
- // ThinLTO.
Function *TargetFunction = Symtab->getFunction(Target);
- if (TargetFunction == nullptr || TargetFunction->isDeclaration()) {
- LLVM_DEBUG(dbgs() << " Not promote: Cannot find the target\n");
- ORE.emit([&]() {
- return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToFindTarget", &CB)
- << "Cannot promote indirect call: target with md5sum "
- << ore::NV("target md5sum", Target) << " not found";
- });
- break;
- }
-
- const char *Reason = nullptr;
- if (!isLegalToPromote(CB, TargetFunction, &Reason)) {
- using namespace ore;
-
- ORE.emit([&]() {
- return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToPromote", &CB)
- << "Cannot promote indirect call to "
- << NV("TargetFunction", TargetFunction) << " with count of "
- << NV("Count", Count) << ": " << Reason;
- });
- break;
+ if (!isValidTarget(Target, TargetFunction, CB, Count)) {
+ if (ICPAllowCandidateSkip)
+ continue;
+ else
+ break;
}
- Ret.push_back(PromotionCandidate(TargetFunction, Count));
+ Ret.push_back(PromotionCandidate(TargetFunction, Count, I));
TotalCount -= Count;
}
return Ret;
@@ -642,7 +690,7 @@ CallBase &llvm::pgo::promoteIndirectCall(CallBase &CB, Function *DirectCallee,
// Promote indirect-call to conditional direct-call for one callsite.
bool IndirectCallPromoter::tryToPromoteWithFuncCmp(
CallBase &CB, Instruction *VPtr, ArrayRef<PromotionCandidate> Candidates,
- uint64_t TotalCount, ArrayRef<InstrProfValueData> ICallProfDataRef,
+ uint64_t TotalCount, MutableArrayRef<InstrProfValueData> ICallProfDataRef,
uint32_t NumCandidates, VTableGUIDCountsMap &VTableGUIDCounts) {
uint32_t NumPromoted = 0;
@@ -655,6 +703,8 @@ bool IndirectCallPromoter::tryToPromoteWithFuncCmp(
NumOfPGOICallPromotion++;
NumPromoted++;
+ // Update the count and this entry will be erased later.
+ ICallProfDataRef[C.Index].Count = 0;
if (!EnableVTableProfileUse || C.VTableGUIDAndCounts.empty())
continue;
@@ -679,21 +729,33 @@ bool IndirectCallPromoter::tryToPromoteWithFuncCmp(
"Number of promoted functions should not be greater than the number "
"of values in profile metadata");
- // Update value profiles on the indirect call.
- updateFuncValueProfiles(CB, ICallProfDataRef.slice(NumPromoted), TotalCount,
- NumCandidates);
+ updateFuncValueProfiles(CB, ICallProfDataRef, TotalCount, NumCandidates);
updateVPtrValueProfiles(VPtr, VTableGUIDCounts);
return true;
}
void IndirectCallPromoter::updateFuncValueProfiles(
- CallBase &CB, ArrayRef<InstrProfValueData> CallVDs, uint64_t TotalCount,
- uint32_t MaxMDCount) {
+ CallBase &CB, MutableArrayRef<InstrProfValueData> CallVDs,
+ uint64_t TotalCount, uint32_t MaxMDCount) {
// First clear the existing !prof.
CB.setMetadata(LLVMContext::MD_prof, nullptr);
+
+ // Sort value profiles by count in descending order.
+ llvm::stable_sort(CallVDs, [](const InstrProfValueData &LHS,
+ const InstrProfValueData &RHS) {
+ return LHS.Count > RHS.Count;
+ });
+ // Drop the <target-value, count> pair if count is zero.
+ ArrayRef<InstrProfValueData> VDs(
+ CallVDs.begin(),
+ llvm::upper_bound(CallVDs, 0U,
+ [](uint64_t Count, const InstrProfValueData &ProfData) {
+ return ProfData.Count <= Count;
+ }));
+
// Annotate the remaining value profiles if counter is not zero.
if (TotalCount != 0)
- annotateValueSite(M, CB, CallVDs, TotalCount, IPVK_IndirectCallTarget,
+ annotateValueSite(M, CB, VDs, TotalCount, IPVK_IndirectCallTarget,
MaxMDCount);
}
@@ -726,7 +788,7 @@ bool IndirectCallPromoter::tryToPromoteWithVTableCmp(
uint64_t TotalFuncCount, uint32_t NumCandidates,
MutableArrayRef<InstrProfValueData> ICallProfDataRef,
VTableGUIDCountsMap &VTableGUIDCounts) {
- SmallVector<uint64_t, 4> PromotedFuncCount;
+ SmallVector<std::pair<uint32_t, uint64_t>, 4> PromotedFuncCount;
for (const auto &Candidate : Candidates) {
for (auto &[GUID, Count] : Candidate.VTableGUIDAndCounts)
@@ -771,7 +833,7 @@ bool IndirectCallPromoter::tryToPromoteWithVTableCmp(
return Remark;
});
- PromotedFuncCount.push_back(Candidate.Count);
+ PromotedFuncCount.push_back({Candidate.Index, Candidate.Count});
assert(TotalFuncCount >= Candidate.Count &&
"Within one prof metadata, total count is the sum of counts from "
@@ -792,22 +854,12 @@ bool IndirectCallPromoter::tryToPromoteWithVTableCmp(
// used to load multiple virtual functions. The vtable profiles needs to be
// updated properly in that case (e.g, for each indirect call annotate both
// type profiles and function profiles in one !prof).
- for (size_t I = 0; I < PromotedFuncCount.size(); I++)
- ICallProfDataRef[I].Count -=
- std::max(PromotedFuncCount[I], ICallProfDataRef[I].Count);
- // Sort value profiles by count in descending order.
- llvm::stable_sort(ICallProfDataRef, [](const InstrProfValueData &LHS,
- const InstrProfValueData &RHS) {
- return LHS.Count > RHS.Count;
- });
- // Drop the <target-value, count> pair if count is zero.
- ArrayRef<InstrProfValueData> VDs(
- ICallProfDataRef.begin(),
- llvm::upper_bound(ICallProfDataRef, 0U,
- [](uint64_t Count, const InstrProfValueData &ProfData) {
- return ProfData.Count <= Count;
- }));
- updateFuncValueProfiles(CB, VDs, TotalFuncCount, NumCandidates);
+ for (size_t I = 0; I < PromotedFuncCount.size(); I++) {
+ uint32_t Index = PromotedFuncCount[I].first;
+ ICallProfDataRef[Index].Count -=
+ std::max(PromotedFuncCount[I].second, ICallProfDataRef[Index].Count);
+ }
+ updateFuncValueProfiles(CB, ICallProfDataRef, TotalFuncCount, NumCandidates);
updateVPtrValueProfiles(VPtr, VTableGUIDCounts);
return true;
}
@@ -822,9 +874,22 @@ bool IndirectCallPromoter::processFunction(ProfileSummaryInfo *PSI) {
uint64_t TotalCount;
auto ICallProfDataRef = ICallAnalysis.getPromotionCandidatesForInstruction(
CB, TotalCount, NumCandidates);
- if (!NumCandidates ||
- (PSI && PSI->hasProfileSummary() && !PSI->isHotCount(TotalCount)))
+ if (!NumCandidates)
continue;
+ if (PSI && PSI->hasProfileSummary()) {
+ // Don't promote cold candidates.
+ if (PSI->isColdCount(TotalCount)) {
+ LLVM_DEBUG(dbgs() << "Don't promote the cold candidate: TotalCount="
+ << TotalCount << "\n");
+ continue;
+ }
+ // Only pormote hot if ICPAllowHotOnly is true.
+ if (ICPAllowHotOnly && !PSI->isHotCount(TotalCount)) {
+ LLVM_DEBUG(dbgs() << "Don't promote the non-hot candidate: TotalCount="
+ << TotalCount << "\n");
+ continue;
+ }
+ }
auto PromotionCandidates = getPromotionCandidatesForCallSite(
*CB, ICallProfDataRef, TotalCount, NumCandidates);
diff --git a/llvm/test/ThinLTO/X86/memprof-icp.ll b/llvm/test/ThinLTO/X86/memprof-icp.ll
index dbc532ee52828..3a68cd89e0b16 100644
--- a/llvm/test/ThinLTO/X86/memprof-icp.ll
+++ b/llvm/test/ThinLTO/X86/memprof-icp.ll
@@ -229,6 +229,7 @@
; RUN: llvm-lto2 run %t/main.o %t/foo.o -enable-memprof-context-disambiguation \
; RUN: -import-instr-limit=0 \
; RUN: -memprof-require-definition-for-promotion \
+; RUN: -icp-allow-decls=false \
; RUN: -enable-memprof-indirect-call-support=true \
; RUN: -supports-hot-cold-new \
; RUN: -r=%t/foo.o,_Z3fooR2B0j,plx \
diff --git a/llvm/test/Transforms/PGOProfile/icp_mismatch_msg.ll b/llvm/test/Transforms/PGOProfile/icp_mismatch_msg.ll
index a81fb364a6f7a..3ea196aec3086 100644
--- a/llvm/test/Transforms/PGOProfile/icp_mismatch_msg.ll
+++ b/llvm/test/Transforms/PGOProfile/icp_mismatch_msg.ll
@@ -1,8 +1,8 @@
; RUN: opt < %s -passes=pgo-icall-prom -pass-remarks-missed=pgo-icall-prom -S 2>& 1 | FileCheck %s
-; CHECK: remark: <unknown>:0:0: Cannot promote indirect call to func4 with count of 1234: The number of arguments mismatch
-; CHECK: remark: <unknown>:0:0: Cannot promote indirect call: target with md5sum{{.*}} not found
-; CHECK: remark: <unknown>:0:0: Cannot promote indirect call to func2 with count of 7890: Return type mismatch
+; CHECK: remark: <unknown>:0:0: Cannot promote indirect call to func4 (count=1234): The number of arguments mismatch
+; CHECK: remark: <unknown>:0:0: Cannot promote indirect call: target with md5sum {{.*}} not found (count=2345)
+; CHECK: remark: <unknown>:0:0: Cannot promote indirect call to func2 (count=7890): Return type mismatch
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
diff --git a/llvm/test/Transforms/PGOProfile/indirect_call_promotion2.ll b/llvm/test/Transforms/PGOProfile/indirect_call_promotion2.ll
new file mode 100644
index 0000000000000..3dfc9269b8409
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/indirect_call_promotion2.ll
@@ -0,0 +1,154 @@
+; RUN: opt < %s -passes=pgo-icall-prom -icp-allow-decls=false -icp-allow-hot-only=true -icp-allow-candidate-skip=false -S -pass-remarks=pgo-icall-prom 2>&1 | FileCheck %s --check-prefix=REMARK1
+; RUN: opt < %s -passes=pgo-icall-prom -icp-allow-decls=true -icp-allow-hot-only=true -icp-allow-candidate-skip=false -S -pass-remarks=pgo-icall-prom 2>&1 | FileCheck %s --check-prefixes=REMARK1,REMARK2
+; RUN: opt < %s -passes=pgo-icall-prom -icp-allow-decls=false -icp-allow-hot-only=true -icp-allow-candidate-skip=false -S -pass-remarks=pgo-icall-prom 2>&1 | FileCheck %s --check-prefix=REMARK1
+; RUN: opt < %s -passes=pgo-icall-prom -icp-allow-decls=false -icp-allow-hot-only=false -icp-allow-candidate-skip=false -S -pass-remarks=pgo-icall-prom 2>&1 | FileCheck %s --check-prefixes=REMARK1
+; RUN: opt < %s -passes=pgo-icall-prom -icp-allow-decls=false -icp-allow-hot-only=false -icp-allow-candidate-skip=true -S -pass-remarks=pgo-icall-prom 2>&1 | FileCheck %s --check-prefixes=REMARK1,REMARK3
+; RUN: opt < %s -passes=pgo-icall-prom -icp-allow-decls=true -icp-allow-hot-only=false -icp-allow-candidate-skip=true -S -pass-remarks=pgo-icall-prom 2>&1 | FileCheck %s --check-prefixes=REMARK1,REMARK2,REMARK4,REMARK5
+; RUN: opt < %s -passes=pgo-icall-prom -icp-allow-decls=false -icp-allow-hot-only=false -icp-allow-candidate-skip=true -S -pass-remarks=pgo-icall-prom 2>&1 | FileCheck %s --check-prefixes=REMARK6,REMARK1,REMARK3
+; RUN: opt < %s -passes=pgo-icall-prom -icp-allow-decls=false -icp-allow-hot-only=false -icp-allow-candidate-skip=true -S | FileCheck %s --check-prefix=METADATA
+
+; REMARK6: remark: <unknown>:0:0: Promote indirect call to add with count 20000 out of 60000
+; REMARK2: remark: <unknown>:0:0: Promote indirect call to sub with count 40000 out of 60000
+; REMARK2: remark: <unknown>:0:0: Promote indirect call to add with count 20000 out of 20000
+; REMARK1: remark: <unknown>:0:0: Promote indirect call to add with count 10000 out of 10000
+; REMARK3: remark: <unknown>:0:0: Promote indirect call to add with count 200 out of 400
+; REMARK4: remark: <unknown>:0:0: Promote indirect call to sub with count 200 out of 400
+; REMARK5: remark: <unknown>:0:0: Promote indirect call to add with count 200 out of 200
+
+ at math = dso_local local_unnamed_addr global ptr null, align 8
+
+define dso_local i32 @add(i32 noundef %a, i32 noundef %b) !prof !34 {
+entry:
+ %add = add nsw i32 %a, %b
+ ret i32 %add
+}
+
+define dso_local range(i32 0, 2) i32 @main() !prof !35 {
+entry:
+ call void @setup(i32 noundef 0)
+ br label %for.cond
+
+for.cond:
+ %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %sum.0 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+ %cmp = icmp samesign ult i32 %i.0, 50000
+ br i1 %cmp, label %for.body, label %for.end, !prof !36
+
+for.body:
+ %0 = load ptr, ptr @math, align 8, !tbaa !37
+ %call = call i32 %0(i32 noundef %i.0, i32 noundef %i.0), !prof !41
+; METADATA: %call = call i32 %0(i32 noundef %i.0, i32 noundef %i.0), !prof ![[NEWVP:[0-9]+]]
+; METADATA: ![[NEWVP]] = !{!"VP", i32 0, i64 40000, i64 -455885480058394486, i64 40000}
+ %add = add nsw i32 %sum.0, %call
+ %inc = add nuw nsw i32 %i.0, 1
+ br label %for.cond, !llvm.loop !42
+
+for.end:
+ call void @setup(i32 noundef 1)
+ br label %for.cond1
+
+for.cond1:
+ %i.1 = phi i32 [ 0, %for.end ], [ %inc7, %for.body3 ]
+ %sum.1 = phi i32 [ %sum.0, %for.end ], [ %add5, %for.body3 ]
+ %cmp2 = icmp samesign ult i32 %i.1, 10000
+ br i1 %cmp2, label %for.body3, label %for.cond9, !prof !44
+
+for.body3:
+ %1 = load ptr, ptr @math, align 8, !tbaa !37
+ %call4 = call i32 %1(i32 noundef %i.1, i32 noundef %i.1), !prof !45
+ %add5 = add nsw i32 %sum.1, %call4
+ %inc7 = add nuw nsw i32 %i.1, 1
+ br label %for.cond1, !llvm.loop !46
+
+for.cond9:
+ %i.2 = phi i32 [ %inc15, %for.body11 ], [ 0, %for.cond1 ]
+ %sum.2 = phi i32 [ %add13, %for.body11 ], [ %sum.1, %for.cond1 ]
+ %cmp10 = icmp samesign ult i32 %i.2, 400
+ br i1 %cmp10, label %for.body11, label %for.cond17, !prof !47
+
+for.body11:
+ call void @setup(i32 noundef %i.2)
+ %2 = load ptr, ptr @math, align 8, !tbaa !37
+ %call12 = call i32 %2(i32 noundef %i.2, i32 noundef %i.2), !prof !48
+ %add13 = add nsw i32 %sum.2, %call12
+ %inc15 = add nuw nsw i32 %i.2, 1
+ br label %for.cond9, !llvm.loop !49
+
+for.cond17:
+ %i.3 = phi i32 [ %inc25, %for.body19 ], [ 0, %for.cond9 ]
+ %sum.3 = phi i32 [ %add23, %for.body19 ], [ %sum.2, %for.cond9 ]
+ %cmp18 = icmp samesign ult i32 %i.3, 400
+ br i1 %cmp18, label %for.body19, label %for.end26, !prof !47
+
+for.body19:
+ %add.i = shl nuw nsw i32 %i.3, 1
+ %add21 = add nsw i32 %sum.3, %add.i
+ %call22 = call i32 @sub(i32 noundef %i.3, i32 noundef %i.3)
+ %add23 = add nsw i32 %add21, %call22
+ %inc25 = add nuw nsw i32 %i.3, 1
+ br label %for.cond17, !llvm.loop !50
+
+for.end26:
+ %cmp27 = icmp slt i32 %sum.3, 11
+ %. = zext i1 %cmp27 to i32
+ ret i32 %.
+}
+
+declare void @setup(i32 noundef)
+
+declare i32 @sub(i32 noundef, i32 noundef)
+
+!llvm.module.flags = !{!0, !1, !2, !3, !4}
+!llvm.ident = !{!33}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 8, !"PIC Level", i32 2}
+!2 = !{i32 7, !"PIE Level", i32 2}
+!3 = !{i32 7, !"uwtable", i32 2}
+!4 = !{i32 1, !"ProfileSummary", !5}
+!5 = !{!6, !7, !8, !9, !10, !11, !12, !13, !14, !15}
+!6 = !{!"ProfileFormat", !"InstrProf"}
+!7 = !{!"TotalCount", i64 122204}
+!8 = !{!"MaxCount", i64 50600}
+!9 = !{!"MaxInternalCount", i64 10000}
+!10 = !{!"MaxFunctionCount", i64 50600}
+!11 = !{!"NumCounts", i64 9}
+!12 = !{!"NumFunctions", i64 4}
+!13 = !{!"IsPartialProfile", i64 0}
+!14 = !{!"PartialProfileRatio", double 0.000000e+00}
+!15 = !{!"DetailedSummary", !16}
+!16 = !{!17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28, !29, !30, !31, !32}
+!17 = !{i32 10000, i64 50600, i32 1}
+!18 = !{i32 100000, i64 50600, i32 1}
+!19 = !{i32 200000, i64 50600, i32 1}
+!20 = !{i32 300000, i64 50600, i32 1}
+!21 = !{i32 400000, i64 50600, i32 1}
+!22 = !{i32 500000, i64 50000, i32 2}
+!23 = !{i32 600000, i64 50000, i32 2}
+!24 = !{i32 700000, i64 50000, i32 2}
+!25 = !{i32 800000, i64 50000, i32 2}
+!26 = !{i32 900000, i64 10200, i32 3}
+!27 = !{i32 950000, i64 10000, i32 4}
+!28 = !{i32 990000, i64 402, i32 5}
+!29 = !{i32 999000, i64 201, i32 8}
+!30 = !{i32 999900, i64 201, i32 8}
+!31 = !{i32 999990, i64 201, i32 8}
+!32 = !{i32 999999, i64 201, i32 8}
+!33 = !{!"clang version 22.0.0git (git at github.com:llvm/llvm-project.git ac20b28c2be26061e63dceac0915f97ece2273ac)"}
+!34 = !{!"function_entry_count", i64 10200}
+!35 = !{!"function_entry_count", i64 1}
+!36 = !{!"branch_weights", i32 50000, i32 1}
+!37 = !{!38, !38, i64 0}
+!38 = !{!"any pointer", !39, i64 0}
+!39 = !{!"omnipotent char", !40, i64 0}
+!40 = !{!"Simple C/C++ TBAA"}
+!41 = !{!"VP", i32 0, i64 60000, i64 -455885480058394486, i64 40000, i64 2232412992676883508, i64 20000}
+!42 = distinct !{!42, !43}
+!43 = !{!"llvm.loop.mustprogress"}
+!44 = !{!"branch_weights", i32 10000, i32 1}
+!45 = !{!"VP", i32 0, i64 10000, i64 2232412992676883508, i64 10000}
+!46 = distinct !{!46, !43}
+!47 = !{!"branch_weights", i32 400, i32 1}
+!48 = !{!"VP", i32 0, i64 400, i64 -455885480058394486, i64 200, i64 2232412992676883508, i64 200}
+!49 = distinct !{!49, !43}
+!50 = distinct !{!50, !43}
More information about the llvm-commits
mailing list