[llvm-branch-commits] [llvm] [Inline][PGO] After inline, update profile for invoke instruction in both cloned instruction in the caller and original callee (PR #83809)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Mar 4 15:40:18 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-ir
Author: Mingming Liu (minglotus-6)
<details>
<summary>Changes</summary>
A related change is https://reviews.llvm.org/D133121, which correctly preserves both branch weights and value profiles for invoke instruction.
* The parent nfc commit is https://github.com/llvm/llvm-project/pull/83780
---
Full diff: https://github.com/llvm/llvm-project/pull/83809.diff
4 Files Affected:
- (modified) llvm/include/llvm/IR/Instructions.h (+3)
- (modified) llvm/lib/IR/Instructions.cpp (+12)
- (modified) llvm/lib/Transforms/Utils/InlineFunction.cpp (+9-2)
- (added) llvm/test/Transforms/Inline/update_invoke_value_profile.ll (+185)
``````````diff
diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h
index bc357074e5cb21..1146b3fa3ae244 100644
--- a/llvm/include/llvm/IR/Instructions.h
+++ b/llvm/include/llvm/IR/Instructions.h
@@ -4360,6 +4360,9 @@ class InvokeInst : public CallBase {
unsigned getNumSuccessors() const { return 2; }
+ /// Updates profile metadata by scaling it by \p S / \p T.
+ void updateProfWeight(uint64_t S, uint64_t T);
+
// Methods for support type inquiry through isa, cast, and dyn_cast:
static bool classof(const Instruction *I) {
return (I->getOpcode() == Instruction::Invoke);
diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp
index 9ae71acd523c36..920ce67f118991 100644
--- a/llvm/lib/IR/Instructions.cpp
+++ b/llvm/lib/IR/Instructions.cpp
@@ -918,6 +918,18 @@ LandingPadInst *InvokeInst::getLandingPadInst() const {
return cast<LandingPadInst>(getUnwindDest()->getFirstNonPHI());
}
+void InvokeInst::updateProfWeight(uint64_t S, uint64_t T) {
+ if (T == 0) {
+ LLVM_DEBUG(dbgs() << "Attempting to update profile weights will result in "
+ "div by 0. Ignoring. Likely the function "
+ << getParent()->getParent()->getName()
+ << " has 0 entry count, and contains call instructions "
+ "with non-zero prof info.");
+ return;
+ }
+ scaleProfData(*this, S, T);
+}
+
//===----------------------------------------------------------------------===//
// CallBrInst Implementation
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index f68fdb26f28173..75b0d0669e9228 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -1909,10 +1909,14 @@ void llvm::updateProfileCallee(
// During inlining ?
if (VMap) {
uint64_t CloneEntryCount = PriorEntryCount - NewEntryCount;
- for (auto Entry : *VMap)
+ for (auto Entry : *VMap) {
if (isa<CallInst>(Entry.first))
if (auto *CI = dyn_cast_or_null<CallInst>(Entry.second))
CI->updateProfWeight(CloneEntryCount, PriorEntryCount);
+ if (isa<InvokeInst>(Entry.first))
+ if (auto *II = dyn_cast_or_null<InvokeInst>(Entry.second))
+ II->updateProfWeight(CloneEntryCount, PriorEntryCount);
+ }
}
if (EntryDelta) {
@@ -1921,9 +1925,12 @@ void llvm::updateProfileCallee(
for (BasicBlock &BB : *Callee)
// No need to update the callsite if it is pruned during inlining.
if (!VMap || VMap->count(&BB))
- for (Instruction &I : BB)
+ for (Instruction &I : BB) {
if (CallInst *CI = dyn_cast<CallInst>(&I))
CI->updateProfWeight(NewEntryCount, PriorEntryCount);
+ if (InvokeInst *II = dyn_cast<InvokeInst>(&I))
+ II->updateProfWeight(NewEntryCount, PriorEntryCount);
+ }
}
}
diff --git a/llvm/test/Transforms/Inline/update_invoke_value_profile.ll b/llvm/test/Transforms/Inline/update_invoke_value_profile.ll
new file mode 100644
index 00000000000000..ac5597a41fce61
--- /dev/null
+++ b/llvm/test/Transforms/Inline/update_invoke_value_profile.ll
@@ -0,0 +1,185 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+
+; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -inline-threshold=1000 -S | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%class.Error = type { i32 }
+ at _ZTI5Error = external constant { ptr, ptr }
+
+define i32 @callee(ptr %b) personality ptr @__gxx_personality_v0 !prof !17 {
+; CHECK-LABEL: define i32 @callee(
+; CHECK-SAME: ptr [[B:%.*]]) personality ptr @__gxx_personality_v0 !prof [[PROF0:![0-9]+]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[E:%.*]] = alloca [[CLASS_ERROR:%.*]], align 8
+; CHECK-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[B]], align 8
+; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VTABLE]], align 8
+; CHECK-NEXT: [[CALL:%.*]] = invoke i32 [[TMP0]](ptr [[B]])
+; CHECK-NEXT: to label [[TRY_CONT:%.*]] unwind label [[LPAD:%.*]], !prof [[PROF1:![0-9]+]]
+; CHECK: lpad:
+; CHECK-NEXT: [[TMP1:%.*]] = landingpad { ptr, i32 }
+; CHECK-NEXT: cleanup
+; CHECK-NEXT: catch ptr @_ZTI5Error
+; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { ptr, i32 } [[TMP1]], 1
+; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.eh.typeid.for(ptr @_ZTI5Error)
+; CHECK-NEXT: [[MATCHES:%.*]] = icmp eq i32 [[TMP2]], [[TMP3]]
+; CHECK-NEXT: br i1 [[MATCHES]], label [[CATCH:%.*]], label [[EHCLEANUP:%.*]]
+; CHECK: catch:
+; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { ptr, i32 } [[TMP1]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = tail call ptr @__cxa_begin_catch(ptr [[TMP4]])
+; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
+; CHECK-NEXT: store i32 [[TMP6]], ptr [[E]], align 4
+; CHECK-NEXT: [[CALL3:%.*]] = invoke i32 @_ZN5Error10error_codeEv(ptr [[E]])
+; CHECK-NEXT: to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD1:%.*]]
+; CHECK: invoke.cont2:
+; CHECK-NEXT: br label [[TRY_CONT]]
+; CHECK: try.cont:
+; CHECK-NEXT: [[RET_0:%.*]] = phi i32 [ [[CALL3]], [[INVOKE_CONT2]] ], [ [[CALL]], [[ENTRY:%.*]] ]
+; CHECK-NEXT: ret i32 [[RET_0]]
+; CHECK: lpad1:
+; CHECK-NEXT: [[TMP7:%.*]] = landingpad { ptr, i32 }
+; CHECK-NEXT: cleanup
+; CHECK-NEXT: invoke void @__cxa_end_catch()
+; CHECK-NEXT: to label [[INVOKE_CONT4:%.*]] unwind label [[TERMINATE_LPAD:%.*]]
+; CHECK: invoke.cont4:
+; CHECK-NEXT: br label [[EHCLEANUP]]
+; CHECK: ehcleanup:
+; CHECK-NEXT: [[LPAD_VAL7_MERGED:%.*]] = phi { ptr, i32 } [ [[TMP7]], [[INVOKE_CONT4]] ], [ [[TMP1]], [[LPAD]] ]
+; CHECK-NEXT: resume { ptr, i32 } [[LPAD_VAL7_MERGED]]
+; CHECK: terminate.lpad:
+; CHECK-NEXT: [[TMP8:%.*]] = landingpad { ptr, i32 }
+; CHECK-NEXT: catch ptr null
+; CHECK-NEXT: unreachable
+;
+entry:
+ %e = alloca %class.Error
+ %vtable = load ptr, ptr %b
+ %0 = load ptr, ptr %vtable
+ %call = invoke i32 %0(ptr %b)
+ to label %try.cont unwind label %lpad, !prof !15
+
+lpad:
+ %1 = landingpad { ptr, i32 }
+ cleanup
+ catch ptr @_ZTI5Error
+ %2 = extractvalue { ptr, i32 } %1, 1
+ %3 = tail call i32 @llvm.eh.typeid.for(ptr @_ZTI5Error)
+ %matches = icmp eq i32 %2, %3
+ br i1 %matches, label %catch, label %ehcleanup
+
+catch:
+ %4 = extractvalue { ptr, i32 } %1, 0
+ %5 = tail call ptr @__cxa_begin_catch(ptr %4)
+ %6 = load i32, ptr %5
+ store i32 %6, ptr %e
+ %call3 = invoke i32 @_ZN5Error10error_codeEv(ptr %e)
+ to label %invoke.cont2 unwind label %lpad1
+
+invoke.cont2:
+ br label %try.cont
+
+try.cont:
+ %ret.0 = phi i32 [ %call3, %invoke.cont2 ], [ %call, %entry ]
+ ret i32 %ret.0
+
+lpad1:
+ %7 = landingpad { ptr, i32 }
+ cleanup
+ invoke void @__cxa_end_catch()
+ to label %invoke.cont4 unwind label %terminate.lpad
+
+invoke.cont4:
+ br label %ehcleanup
+
+ehcleanup:
+ %lpad.val7.merged = phi { ptr, i32 } [ %7, %invoke.cont4 ], [ %1, %lpad ]
+ resume { ptr, i32 } %lpad.val7.merged
+
+terminate.lpad:
+ %8 = landingpad { ptr, i32 }
+ catch ptr null
+ unreachable
+}
+
+define i32 @caller(ptr %b) !prof !16 {
+; CHECK-LABEL: define i32 @caller(
+; CHECK-SAME: ptr [[B:%.*]]) personality ptr @__gxx_personality_v0 !prof [[PROF2:![0-9]+]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[E_I:%.*]] = alloca [[CLASS_ERROR:%.*]], align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[E_I]])
+; CHECK-NEXT: [[VTABLE_I:%.*]] = load ptr, ptr [[B]], align 8
+; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VTABLE_I]], align 8
+; CHECK-NEXT: [[CALL_I:%.*]] = invoke i32 [[TMP0]](ptr [[B]])
+; CHECK-NEXT: to label [[CALLEE_EXIT:%.*]] unwind label [[LPAD_I:%.*]], !prof [[PROF3:![0-9]+]]
+; CHECK: lpad.i:
+; CHECK-NEXT: [[TMP1:%.*]] = landingpad { ptr, i32 }
+; CHECK-NEXT: cleanup
+; CHECK-NEXT: catch ptr @_ZTI5Error
+; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { ptr, i32 } [[TMP1]], 1
+; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.eh.typeid.for(ptr @_ZTI5Error)
+; CHECK-NEXT: [[MATCHES_I:%.*]] = icmp eq i32 [[TMP2]], [[TMP3]]
+; CHECK-NEXT: br i1 [[MATCHES_I]], label [[CATCH_I:%.*]], label [[EHCLEANUP_I:%.*]]
+; CHECK: catch.i:
+; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { ptr, i32 } [[TMP1]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = tail call ptr @__cxa_begin_catch(ptr [[TMP4]])
+; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
+; CHECK-NEXT: store i32 [[TMP6]], ptr [[E_I]], align 4
+; CHECK-NEXT: [[CALL3_I:%.*]] = invoke i32 @_ZN5Error10error_codeEv(ptr [[E_I]])
+; CHECK-NEXT: to label [[INVOKE_CONT2_I:%.*]] unwind label [[LPAD1_I:%.*]]
+; CHECK: invoke.cont2.i:
+; CHECK-NEXT: br label [[CALLEE_EXIT]]
+; CHECK: lpad1.i:
+; CHECK-NEXT: [[TMP7:%.*]] = landingpad { ptr, i32 }
+; CHECK-NEXT: cleanup
+; CHECK-NEXT: invoke void @__cxa_end_catch()
+; CHECK-NEXT: to label [[INVOKE_CONT4_I:%.*]] unwind label [[TERMINATE_LPAD_I:%.*]]
+; CHECK: invoke.cont4.i:
+; CHECK-NEXT: br label [[EHCLEANUP_I]]
+; CHECK: ehcleanup.i:
+; CHECK-NEXT: [[LPAD_VAL7_MERGED_I:%.*]] = phi { ptr, i32 } [ [[TMP7]], [[INVOKE_CONT4_I]] ], [ [[TMP1]], [[LPAD_I]] ]
+; CHECK-NEXT: resume { ptr, i32 } [[LPAD_VAL7_MERGED_I]]
+; CHECK: terminate.lpad.i:
+; CHECK-NEXT: [[TMP8:%.*]] = landingpad { ptr, i32 }
+; CHECK-NEXT: catch ptr null
+; CHECK-NEXT: unreachable
+; CHECK: callee.exit:
+; CHECK-NEXT: [[RET_0_I:%.*]] = phi i32 [ [[CALL3_I]], [[INVOKE_CONT2_I]] ], [ [[CALL_I]], [[ENTRY:%.*]] ]
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[E_I]])
+; CHECK-NEXT: ret i32 [[RET_0_I]]
+;
+entry:
+ %call = tail call i32 @callee(ptr %b)
+ ret i32 %call
+}
+
+declare i32 @__gxx_personality_v0(...)
+declare i32 @llvm.eh.typeid.for(ptr)
+declare ptr @__cxa_begin_catch(ptr)
+declare i32 @_ZN5Error10error_codeEv(ptr)
+declare void @__cxa_end_catch()
+
+
+!1 = !{i32 1, !"ProfileSummary", !2}
+!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
+!3 = !{!"ProfileFormat", !"InstrProf"}
+!4 = !{!"TotalCount", i64 10000}
+!5 = !{!"MaxCount", i64 10}
+!6 = !{!"MaxInternalCount", i64 1}
+!7 = !{!"MaxFunctionCount", i64 1000}
+!8 = !{!"NumCounts", i64 3}
+!9 = !{!"NumFunctions", i64 3}
+!10 = !{!"DetailedSummary", !11}
+!11 = !{!12, !13, !14}
+!12 = !{i32 10000, i64 100, i32 1}
+!13 = !{i32 999000, i64 100, i32 1}
+!14 = !{i32 999999, i64 1, i32 2}
+!15 = !{!"VP", i32 0, i64 1500, i64 9261744921105590125, i64 1500}
+!16 = !{!"function_entry_count", i64 1000}
+!17 = !{!"function_entry_count", i64 1500}
+;.
+; CHECK: [[PROF0]] = !{!"function_entry_count", i64 500}
+; CHECK: [[PROF1]] = !{!"VP", i32 0, i64 500, i64 -9184999152603961491, i64 500}
+; CHECK: [[PROF2]] = !{!"function_entry_count", i64 1000}
+; CHECK: [[PROF3]] = !{!"VP", i32 0, i64 1000, i64 -9184999152603961491, i64 1000}
+;.
``````````
</details>
https://github.com/llvm/llvm-project/pull/83809
More information about the llvm-branch-commits
mailing list