[llvm] [Inline][PGO] After inline, update profile for invoke instruction in both cloned instruction in the caller and original callee (PR #83809)

Mingming Liu via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 28 16:20:05 PDT 2024


https://github.com/minglotus-6 updated https://github.com/llvm/llvm-project/pull/83809

>From e7c6220a4f2c42c94fa33fd8c61da569ef67d4db Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Mon, 4 Mar 2024 00:04:11 -0800
Subject: [PATCH 1/7] [nfc][PGO]Factor out profile scaling into a standalone
 function

---
 llvm/include/llvm/IR/ProfDataUtils.h |  3 ++
 llvm/lib/IR/Instructions.cpp         | 46 +-------------------------
 llvm/lib/IR/ProfDataUtils.cpp        | 48 ++++++++++++++++++++++++++++
 3 files changed, 52 insertions(+), 45 deletions(-)

diff --git a/llvm/include/llvm/IR/ProfDataUtils.h b/llvm/include/llvm/IR/ProfDataUtils.h
index 255fa2ff1c7906..c0897408986fb3 100644
--- a/llvm/include/llvm/IR/ProfDataUtils.h
+++ b/llvm/include/llvm/IR/ProfDataUtils.h
@@ -108,5 +108,8 @@ bool extractProfTotalWeight(const Instruction &I, uint64_t &TotalWeights);
 /// a `prof` metadata reference to instruction `I`.
 void setBranchWeights(Instruction &I, ArrayRef<uint32_t> Weights);
 
+/// Scaling the profile data attached to 'I' using the ratio of S/T.
+void scaleProfData(Instruction &I, uint64_t S, uint64_t T);
+
 } // namespace llvm
 #endif
diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp
index 42cdcad78228f6..9ae71acd523c36 100644
--- a/llvm/lib/IR/Instructions.cpp
+++ b/llvm/lib/IR/Instructions.cpp
@@ -825,15 +825,6 @@ CallInst *CallInst::Create(CallInst *CI, ArrayRef<OperandBundleDef> OpB,
 // of S/T. The meaning of "branch_weights" meta data for call instruction is
 // transfered to represent call count.
 void CallInst::updateProfWeight(uint64_t S, uint64_t T) {
-  auto *ProfileData = getMetadata(LLVMContext::MD_prof);
-  if (ProfileData == nullptr)
-    return;
-
-  auto *ProfDataName = dyn_cast<MDString>(ProfileData->getOperand(0));
-  if (!ProfDataName || (!ProfDataName->getString().equals("branch_weights") &&
-                        !ProfDataName->getString().equals("VP")))
-    return;
-
   if (T == 0) {
     LLVM_DEBUG(dbgs() << "Attempting to update profile weights will result in "
                          "div by 0. Ignoring. Likely the function "
@@ -842,42 +833,7 @@ void CallInst::updateProfWeight(uint64_t S, uint64_t T) {
                          "with non-zero prof info.");
     return;
   }
-
-  MDBuilder MDB(getContext());
-  SmallVector<Metadata *, 3> Vals;
-  Vals.push_back(ProfileData->getOperand(0));
-  APInt APS(128, S), APT(128, T);
-  if (ProfDataName->getString().equals("branch_weights") &&
-      ProfileData->getNumOperands() > 0) {
-    // Using APInt::div may be expensive, but most cases should fit 64 bits.
-    APInt Val(128, mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(1))
-                       ->getValue()
-                       .getZExtValue());
-    Val *= APS;
-    Vals.push_back(MDB.createConstant(
-        ConstantInt::get(Type::getInt32Ty(getContext()),
-                         Val.udiv(APT).getLimitedValue(UINT32_MAX))));
-  } else if (ProfDataName->getString().equals("VP"))
-    for (unsigned i = 1; i < ProfileData->getNumOperands(); i += 2) {
-      // The first value is the key of the value profile, which will not change.
-      Vals.push_back(ProfileData->getOperand(i));
-      uint64_t Count =
-          mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(i + 1))
-              ->getValue()
-              .getZExtValue();
-      // Don't scale the magic number.
-      if (Count == NOMORE_ICP_MAGICNUM) {
-        Vals.push_back(ProfileData->getOperand(i + 1));
-        continue;
-      }
-      // Using APInt::div may be expensive, but most cases should fit 64 bits.
-      APInt Val(128, Count);
-      Val *= APS;
-      Vals.push_back(MDB.createConstant(
-          ConstantInt::get(Type::getInt64Ty(getContext()),
-                           Val.udiv(APT).getLimitedValue())));
-    }
-  setMetadata(LLVMContext::MD_prof, MDNode::get(getContext(), Vals));
+  scaleProfData(*this, S, T);
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/IR/ProfDataUtils.cpp b/llvm/lib/IR/ProfDataUtils.cpp
index b1a10d0ce5a522..dc86f4204b1a1d 100644
--- a/llvm/lib/IR/ProfDataUtils.cpp
+++ b/llvm/lib/IR/ProfDataUtils.cpp
@@ -190,4 +190,52 @@ void setBranchWeights(Instruction &I, ArrayRef<uint32_t> Weights) {
   I.setMetadata(LLVMContext::MD_prof, BranchWeights);
 }
 
+void scaleProfData(Instruction &I, uint64_t S, uint64_t T) {
+  assert(T != 0 && "Caller should guarantee");
+  auto *ProfileData = I.getMetadata(LLVMContext::MD_prof);
+  if (ProfileData == nullptr)
+    return;
+
+  auto *ProfDataName = dyn_cast<MDString>(ProfileData->getOperand(0));
+  if (!ProfDataName || (!ProfDataName->getString().equals("branch_weights") &&
+                        !ProfDataName->getString().equals("VP")))
+    return;
+
+  LLVMContext &C = I.getContext();
+
+  MDBuilder MDB(C);
+  SmallVector<Metadata *, 3> Vals;
+  Vals.push_back(ProfileData->getOperand(0));
+  APInt APS(128, S), APT(128, T);
+  if (ProfDataName->getString().equals("branch_weights") &&
+      ProfileData->getNumOperands() > 0) {
+    // Using APInt::div may be expensive, but most cases should fit 64 bits.
+    APInt Val(128, mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(1))
+                       ->getValue()
+                       .getZExtValue());
+    Val *= APS;
+    Vals.push_back(MDB.createConstant(ConstantInt::get(
+        Type::getInt32Ty(C), Val.udiv(APT).getLimitedValue(UINT32_MAX))));
+  } else if (ProfDataName->getString().equals("VP"))
+    for (unsigned i = 1; i < ProfileData->getNumOperands(); i += 2) {
+      // The first value is the key of the value profile, which will not change.
+      Vals.push_back(ProfileData->getOperand(i));
+      uint64_t Count =
+          mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(i + 1))
+              ->getValue()
+              .getZExtValue();
+      // Don't scale the magic number.
+      if (Count == NOMORE_ICP_MAGICNUM) {
+        Vals.push_back(ProfileData->getOperand(i + 1));
+        continue;
+      }
+      // Using APInt::div may be expensive, but most cases should fit 64 bits.
+      APInt Val(128, Count);
+      Val *= APS;
+      Vals.push_back(MDB.createConstant(ConstantInt::get(
+          Type::getInt64Ty(C), Val.udiv(APT).getLimitedValue())));
+    }
+  I.setMetadata(LLVMContext::MD_prof, MDNode::get(C, Vals));
+}
+
 } // namespace llvm

>From 919f9a2f291fcaf71adeaa07d797cbfdc119518b Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Mon, 4 Mar 2024 00:53:03 -0800
Subject: [PATCH 2/7] add a regression test

---
 .../Transforms/Inline/update_value_profile.ll | 82 +++++++++++++++++++
 1 file changed, 82 insertions(+)
 create mode 100644 llvm/test/Transforms/Inline/update_value_profile.ll

diff --git a/llvm/test/Transforms/Inline/update_value_profile.ll b/llvm/test/Transforms/Inline/update_value_profile.ll
new file mode 100644
index 00000000000000..7fa8c28f89f7ce
--- /dev/null
+++ b/llvm/test/Transforms/Inline/update_value_profile.ll
@@ -0,0 +1,82 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -inline-threshold=100 -S | FileCheck %s
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; When 'callee' is inlined into caller1 and caller2, the indirect call value
+; profiles of the inlined copy should be scaled based on callers' profiles,
+; and the indirect call value profiles in 'callee' should be updated.
+define i32 @callee(ptr %0, i32 %1) !prof !20 {
+; CHECK-LABEL: define i32 @callee(
+; CHECK-SAME: ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) !prof [[PROF0:![0-9]+]] {
+; CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP0]], align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 8
+; CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+; CHECK-NEXT:    [[TMP6:%.*]] = tail call i32 [[TMP5]](ptr [[TMP0]], i32 [[TMP1]]), !prof [[PROF1:![0-9]+]]
+; CHECK-NEXT:    ret i32 [[TMP6]]
+;
+  %3 = load ptr, ptr %0
+  %5 = getelementptr inbounds i8, ptr %3, i64 8
+  %6 = load ptr, ptr %5
+  %7 = tail call i32 %6(ptr %0, i32 %1), !prof !17
+  ret i32 %7
+}
+
+define i32 @caller1(i32 %0) !prof !18 {
+; CHECK-LABEL: define i32 @caller1(
+; CHECK-SAME: i32 [[TMP0:%.*]]) !prof [[PROF2:![0-9]+]] {
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call ptr @_Z10createTypei(i32 [[TMP0]])
+; CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 8
+; CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+; CHECK-NEXT:    [[TMP6:%.*]] = tail call i32 [[TMP5]](ptr [[TMP2]], i32 [[TMP0]]), !prof [[PROF3:![0-9]+]]
+; CHECK-NEXT:    ret i32 [[TMP6]]
+;
+  %2 = tail call ptr @_Z10createTypei(i32 %0)
+  %3 = tail call i32 @callee(ptr %2, i32 %0)
+  ret i32 %3
+}
+
+define i32 @caller2(i32 %0) !prof !19  {
+; CHECK-LABEL: define i32 @caller2(
+; CHECK-SAME: i32 [[TMP0:%.*]]) !prof [[PROF4:![0-9]+]] {
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call ptr @_Z10createTypei(i32 [[TMP0]])
+; CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 8
+; CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+; CHECK-NEXT:    [[TMP6:%.*]] = tail call i32 [[TMP5]](ptr [[TMP2]], i32 [[TMP0]]), !prof [[PROF5:![0-9]+]]
+; CHECK-NEXT:    ret i32 [[TMP6]]
+;
+  %2 = tail call ptr @_Z10createTypei(i32 %0)
+  %3 = tail call i32 @callee(ptr %2, i32 %0)
+  ret i32 %3
+}
+
+declare ptr @_Z10createTypei(i32)
+
+!1 = !{i32 1, !"ProfileSummary", !2}
+!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
+!3 = !{!"ProfileFormat", !"InstrProf"}
+!4 = !{!"TotalCount", i64 10000}
+!5 = !{!"MaxCount", i64 10}
+!6 = !{!"MaxInternalCount", i64 1}
+!7 = !{!"MaxFunctionCount", i64 1000}
+!8 = !{!"NumCounts", i64 3}
+!9 = !{!"NumFunctions", i64 3}
+!10 = !{!"DetailedSummary", !11}
+!11 = !{!12, !13, !14}
+!12 = !{i32 10000, i64 100, i32 1}
+!13 = !{i32 999000, i64 100, i32 1}
+!14 = !{i32 999999, i64 1, i32 2}
+!17 = !{!"VP", i32 0, i64 1600, i64 15186643663281959480, i64 1000, i64 15101948577241817854, i64 600}
+!18 = !{!"function_entry_count", i64 1000}
+!19 = !{!"function_entry_count", i64 600}
+!20 = !{!"function_entry_count", i64 1700}
+;.
+; CHECK: [[PROF0]] = !{!"function_entry_count", i64 100}
+; CHECK: [[PROF1]] = !{!"VP", i32 0, i64 94, i64 -3260100410427592136, i64 58, i64 -3344795496467733762, i64 35}
+; CHECK: [[PROF2]] = !{!"function_entry_count", i64 1000}
+; CHECK: [[PROF3]] = !{!"VP", i32 0, i64 941, i64 -3260100410427592136, i64 588, i64 -3344795496467733762, i64 352}
+; CHECK: [[PROF4]] = !{!"function_entry_count", i64 600}
+; CHECK: [[PROF5]] = !{!"VP", i32 0, i64 564, i64 -3260100410427592136, i64 352, i64 -3344795496467733762, i64 211}
+;.

>From 9575b83ea40012ecbfbf301a24ec89de0726ffd4 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Mon, 4 Mar 2024 00:43:55 -0800
Subject: [PATCH 3/7] update profile for invoke instruction in caller and
 callee after inline

---
 llvm/include/llvm/IR/Instructions.h           |   3 +
 llvm/lib/IR/Instructions.cpp                  |  12 ++
 llvm/lib/Transforms/Utils/InlineFunction.cpp  |  11 +-
 .../Inline/update_invoke_value_profile.ll     | 185 ++++++++++++++++++
 4 files changed, 209 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/Transforms/Inline/update_invoke_value_profile.ll

diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h
index bc357074e5cb21..1146b3fa3ae244 100644
--- a/llvm/include/llvm/IR/Instructions.h
+++ b/llvm/include/llvm/IR/Instructions.h
@@ -4360,6 +4360,9 @@ class InvokeInst : public CallBase {
 
   unsigned getNumSuccessors() const { return 2; }
 
+  /// Updates profile metadata by scaling it by \p S / \p T.
+  void updateProfWeight(uint64_t S, uint64_t T);
+
   // Methods for support type inquiry through isa, cast, and dyn_cast:
   static bool classof(const Instruction *I) {
     return (I->getOpcode() == Instruction::Invoke);
diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp
index 9ae71acd523c36..920ce67f118991 100644
--- a/llvm/lib/IR/Instructions.cpp
+++ b/llvm/lib/IR/Instructions.cpp
@@ -918,6 +918,18 @@ LandingPadInst *InvokeInst::getLandingPadInst() const {
   return cast<LandingPadInst>(getUnwindDest()->getFirstNonPHI());
 }
 
+void InvokeInst::updateProfWeight(uint64_t S, uint64_t T) {
+  if (T == 0) {
+    LLVM_DEBUG(dbgs() << "Attempting to update profile weights will result in "
+                         "div by 0. Ignoring. Likely the function "
+                      << getParent()->getParent()->getName()
+                      << " has 0 entry count, and contains call instructions "
+                         "with non-zero prof info.");
+    return;
+  }
+  scaleProfData(*this, S, T);
+}
+
 //===----------------------------------------------------------------------===//
 //                        CallBrInst Implementation
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index f68fdb26f28173..75b0d0669e9228 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -1909,10 +1909,14 @@ void llvm::updateProfileCallee(
   // During inlining ?
   if (VMap) {
     uint64_t CloneEntryCount = PriorEntryCount - NewEntryCount;
-    for (auto Entry : *VMap)
+    for (auto Entry : *VMap) {
       if (isa<CallInst>(Entry.first))
         if (auto *CI = dyn_cast_or_null<CallInst>(Entry.second))
           CI->updateProfWeight(CloneEntryCount, PriorEntryCount);
+      if (isa<InvokeInst>(Entry.first))
+        if (auto *II = dyn_cast_or_null<InvokeInst>(Entry.second))
+          II->updateProfWeight(CloneEntryCount, PriorEntryCount);
+    }
   }
 
   if (EntryDelta) {
@@ -1921,9 +1925,12 @@ void llvm::updateProfileCallee(
     for (BasicBlock &BB : *Callee)
       // No need to update the callsite if it is pruned during inlining.
       if (!VMap || VMap->count(&BB))
-        for (Instruction &I : BB)
+        for (Instruction &I : BB) {
           if (CallInst *CI = dyn_cast<CallInst>(&I))
             CI->updateProfWeight(NewEntryCount, PriorEntryCount);
+          if (InvokeInst *II = dyn_cast<InvokeInst>(&I))
+            II->updateProfWeight(NewEntryCount, PriorEntryCount);
+        }
   }
 }
 
diff --git a/llvm/test/Transforms/Inline/update_invoke_value_profile.ll b/llvm/test/Transforms/Inline/update_invoke_value_profile.ll
new file mode 100644
index 00000000000000..ac5597a41fce61
--- /dev/null
+++ b/llvm/test/Transforms/Inline/update_invoke_value_profile.ll
@@ -0,0 +1,185 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+
+; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -inline-threshold=1000 -S | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%class.Error = type { i32 }
+ at _ZTI5Error = external constant { ptr, ptr }
+
+define i32 @callee(ptr %b) personality ptr @__gxx_personality_v0 !prof !17 {
+; CHECK-LABEL: define i32 @callee(
+; CHECK-SAME: ptr [[B:%.*]]) personality ptr @__gxx_personality_v0 !prof [[PROF0:![0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[E:%.*]] = alloca [[CLASS_ERROR:%.*]], align 8
+; CHECK-NEXT:    [[VTABLE:%.*]] = load ptr, ptr [[B]], align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[VTABLE]], align 8
+; CHECK-NEXT:    [[CALL:%.*]] = invoke i32 [[TMP0]](ptr [[B]])
+; CHECK-NEXT:            to label [[TRY_CONT:%.*]] unwind label [[LPAD:%.*]], !prof [[PROF1:![0-9]+]]
+; CHECK:       lpad:
+; CHECK-NEXT:    [[TMP1:%.*]] = landingpad { ptr, i32 }
+; CHECK-NEXT:            cleanup
+; CHECK-NEXT:            catch ptr @_ZTI5Error
+; CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { ptr, i32 } [[TMP1]], 1
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.eh.typeid.for(ptr @_ZTI5Error)
+; CHECK-NEXT:    [[MATCHES:%.*]] = icmp eq i32 [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    br i1 [[MATCHES]], label [[CATCH:%.*]], label [[EHCLEANUP:%.*]]
+; CHECK:       catch:
+; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { ptr, i32 } [[TMP1]], 0
+; CHECK-NEXT:    [[TMP5:%.*]] = tail call ptr @__cxa_begin_catch(ptr [[TMP4]])
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
+; CHECK-NEXT:    store i32 [[TMP6]], ptr [[E]], align 4
+; CHECK-NEXT:    [[CALL3:%.*]] = invoke i32 @_ZN5Error10error_codeEv(ptr [[E]])
+; CHECK-NEXT:            to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD1:%.*]]
+; CHECK:       invoke.cont2:
+; CHECK-NEXT:    br label [[TRY_CONT]]
+; CHECK:       try.cont:
+; CHECK-NEXT:    [[RET_0:%.*]] = phi i32 [ [[CALL3]], [[INVOKE_CONT2]] ], [ [[CALL]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret i32 [[RET_0]]
+; CHECK:       lpad1:
+; CHECK-NEXT:    [[TMP7:%.*]] = landingpad { ptr, i32 }
+; CHECK-NEXT:            cleanup
+; CHECK-NEXT:    invoke void @__cxa_end_catch()
+; CHECK-NEXT:            to label [[INVOKE_CONT4:%.*]] unwind label [[TERMINATE_LPAD:%.*]]
+; CHECK:       invoke.cont4:
+; CHECK-NEXT:    br label [[EHCLEANUP]]
+; CHECK:       ehcleanup:
+; CHECK-NEXT:    [[LPAD_VAL7_MERGED:%.*]] = phi { ptr, i32 } [ [[TMP7]], [[INVOKE_CONT4]] ], [ [[TMP1]], [[LPAD]] ]
+; CHECK-NEXT:    resume { ptr, i32 } [[LPAD_VAL7_MERGED]]
+; CHECK:       terminate.lpad:
+; CHECK-NEXT:    [[TMP8:%.*]] = landingpad { ptr, i32 }
+; CHECK-NEXT:            catch ptr null
+; CHECK-NEXT:    unreachable
+;
+entry:
+  %e = alloca %class.Error
+  %vtable = load ptr, ptr %b
+  %0 = load ptr, ptr %vtable
+  %call = invoke i32 %0(ptr %b)
+  to label %try.cont unwind label %lpad, !prof !15
+
+lpad:
+  %1 = landingpad { ptr, i32 }
+  cleanup
+  catch ptr @_ZTI5Error
+  %2 = extractvalue { ptr, i32 } %1, 1
+  %3 = tail call i32 @llvm.eh.typeid.for(ptr @_ZTI5Error)
+  %matches = icmp eq i32 %2, %3
+  br i1 %matches, label %catch, label %ehcleanup
+
+catch:
+  %4 = extractvalue { ptr, i32 } %1, 0
+  %5 = tail call ptr @__cxa_begin_catch(ptr %4)
+  %6 = load i32, ptr %5
+  store i32 %6, ptr %e
+  %call3 = invoke i32 @_ZN5Error10error_codeEv(ptr %e)
+  to label %invoke.cont2 unwind label %lpad1
+
+invoke.cont2:
+  br label %try.cont
+
+try.cont:
+  %ret.0 = phi i32 [ %call3, %invoke.cont2 ], [ %call, %entry ]
+  ret i32 %ret.0
+
+lpad1:
+  %7 = landingpad { ptr, i32 }
+  cleanup
+  invoke void @__cxa_end_catch()
+  to label %invoke.cont4 unwind label %terminate.lpad
+
+invoke.cont4:
+  br label %ehcleanup
+
+ehcleanup:
+  %lpad.val7.merged = phi { ptr, i32 } [ %7, %invoke.cont4 ], [ %1, %lpad ]
+  resume { ptr, i32 } %lpad.val7.merged
+
+terminate.lpad:
+  %8 = landingpad { ptr, i32 }
+  catch ptr null
+  unreachable
+}
+
+define i32 @caller(ptr %b) !prof !16 {
+; CHECK-LABEL: define i32 @caller(
+; CHECK-SAME: ptr [[B:%.*]]) personality ptr @__gxx_personality_v0 !prof [[PROF2:![0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[E_I:%.*]] = alloca [[CLASS_ERROR:%.*]], align 8
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(i64 4, ptr [[E_I]])
+; CHECK-NEXT:    [[VTABLE_I:%.*]] = load ptr, ptr [[B]], align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[VTABLE_I]], align 8
+; CHECK-NEXT:    [[CALL_I:%.*]] = invoke i32 [[TMP0]](ptr [[B]])
+; CHECK-NEXT:            to label [[CALLEE_EXIT:%.*]] unwind label [[LPAD_I:%.*]], !prof [[PROF3:![0-9]+]]
+; CHECK:       lpad.i:
+; CHECK-NEXT:    [[TMP1:%.*]] = landingpad { ptr, i32 }
+; CHECK-NEXT:            cleanup
+; CHECK-NEXT:            catch ptr @_ZTI5Error
+; CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { ptr, i32 } [[TMP1]], 1
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.eh.typeid.for(ptr @_ZTI5Error)
+; CHECK-NEXT:    [[MATCHES_I:%.*]] = icmp eq i32 [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    br i1 [[MATCHES_I]], label [[CATCH_I:%.*]], label [[EHCLEANUP_I:%.*]]
+; CHECK:       catch.i:
+; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { ptr, i32 } [[TMP1]], 0
+; CHECK-NEXT:    [[TMP5:%.*]] = tail call ptr @__cxa_begin_catch(ptr [[TMP4]])
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
+; CHECK-NEXT:    store i32 [[TMP6]], ptr [[E_I]], align 4
+; CHECK-NEXT:    [[CALL3_I:%.*]] = invoke i32 @_ZN5Error10error_codeEv(ptr [[E_I]])
+; CHECK-NEXT:            to label [[INVOKE_CONT2_I:%.*]] unwind label [[LPAD1_I:%.*]]
+; CHECK:       invoke.cont2.i:
+; CHECK-NEXT:    br label [[CALLEE_EXIT]]
+; CHECK:       lpad1.i:
+; CHECK-NEXT:    [[TMP7:%.*]] = landingpad { ptr, i32 }
+; CHECK-NEXT:            cleanup
+; CHECK-NEXT:    invoke void @__cxa_end_catch()
+; CHECK-NEXT:            to label [[INVOKE_CONT4_I:%.*]] unwind label [[TERMINATE_LPAD_I:%.*]]
+; CHECK:       invoke.cont4.i:
+; CHECK-NEXT:    br label [[EHCLEANUP_I]]
+; CHECK:       ehcleanup.i:
+; CHECK-NEXT:    [[LPAD_VAL7_MERGED_I:%.*]] = phi { ptr, i32 } [ [[TMP7]], [[INVOKE_CONT4_I]] ], [ [[TMP1]], [[LPAD_I]] ]
+; CHECK-NEXT:    resume { ptr, i32 } [[LPAD_VAL7_MERGED_I]]
+; CHECK:       terminate.lpad.i:
+; CHECK-NEXT:    [[TMP8:%.*]] = landingpad { ptr, i32 }
+; CHECK-NEXT:            catch ptr null
+; CHECK-NEXT:    unreachable
+; CHECK:       callee.exit:
+; CHECK-NEXT:    [[RET_0_I:%.*]] = phi i32 [ [[CALL3_I]], [[INVOKE_CONT2_I]] ], [ [[CALL_I]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 4, ptr [[E_I]])
+; CHECK-NEXT:    ret i32 [[RET_0_I]]
+;
+entry:
+  %call = tail call i32 @callee(ptr %b)
+  ret i32 %call
+}
+
+declare i32 @__gxx_personality_v0(...)
+declare i32 @llvm.eh.typeid.for(ptr)
+declare ptr @__cxa_begin_catch(ptr)
+declare i32 @_ZN5Error10error_codeEv(ptr)
+declare void @__cxa_end_catch()
+
+
+!1 = !{i32 1, !"ProfileSummary", !2}
+!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
+!3 = !{!"ProfileFormat", !"InstrProf"}
+!4 = !{!"TotalCount", i64 10000}
+!5 = !{!"MaxCount", i64 10}
+!6 = !{!"MaxInternalCount", i64 1}
+!7 = !{!"MaxFunctionCount", i64 1000}
+!8 = !{!"NumCounts", i64 3}
+!9 = !{!"NumFunctions", i64 3}
+!10 = !{!"DetailedSummary", !11}
+!11 = !{!12, !13, !14}
+!12 = !{i32 10000, i64 100, i32 1}
+!13 = !{i32 999000, i64 100, i32 1}
+!14 = !{i32 999999, i64 1, i32 2}
+!15 = !{!"VP", i32 0, i64 1500, i64 9261744921105590125, i64 1500}
+!16 = !{!"function_entry_count", i64 1000}
+!17 = !{!"function_entry_count", i64 1500}
+;.
+; CHECK: [[PROF0]] = !{!"function_entry_count", i64 500}
+; CHECK: [[PROF1]] = !{!"VP", i32 0, i64 500, i64 -9184999152603961491, i64 500}
+; CHECK: [[PROF2]] = !{!"function_entry_count", i64 1000}
+; CHECK: [[PROF3]] = !{!"VP", i32 0, i64 1000, i64 -9184999152603961491, i64 1000}
+;.

>From 15776ef870ec715e657bd147c9432f20033db050 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Tue, 26 Mar 2024 14:49:19 -0700
Subject: [PATCH 4/7] add two pre-commit tests, one for invoke with branch
 weights, the other for invoke with value profiles

---
 .../Transforms/Inline/update_invoke_counts.ll | 54 +++++++++++++++++++
 .../Inline/update_invoke_value_profile.ll     | 54 +++++++++++++++++++
 2 files changed, 108 insertions(+)
 create mode 100644 llvm/test/Transforms/Inline/update_invoke_counts.ll
 create mode 100644 llvm/test/Transforms/Inline/update_invoke_value_profile.ll

diff --git a/llvm/test/Transforms/Inline/update_invoke_counts.ll b/llvm/test/Transforms/Inline/update_invoke_counts.ll
new file mode 100644
index 00000000000000..bc0677f0bf0091
--- /dev/null
+++ b/llvm/test/Transforms/Inline/update_invoke_counts.ll
@@ -0,0 +1,54 @@
+; A pre-commit test to show that branch weight associated with inovke are not updated.
+; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -S | FileCheck %s
+
+declare i32 @__gxx_personality_v0(...)
+
+define void @caller(ptr %func) personality ptr @__gxx_personality_v0 !prof !15 {
+  call void @callee(ptr %func), !prof !16
+  ret void
+}
+
+declare void @inner_callee(ptr %func)
+
+define void @callee(ptr %func) personality ptr @__gxx_personality_v0 !prof !17 {
+  invoke void @inner_callee(ptr %func)
+          to label %ret unwind label %lpad, !prof !18
+
+ret:
+  ret void
+
+lpad:
+  %exn = landingpad {ptr, i32}
+          cleanup
+  unreachable
+}
+
+!llvm.module.flags = !{!1}
+!1 = !{i32 1, !"ProfileSummary", !2}
+!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
+!3 = !{!"ProfileFormat", !"SampleProfile"}
+!4 = !{!"TotalCount", i64 10000}
+!5 = !{!"MaxCount", i64 10}
+!6 = !{!"MaxInternalCount", i64 1}
+!7 = !{!"MaxFunctionCount", i64 2000}
+!8 = !{!"NumCounts", i64 2}
+!9 = !{!"NumFunctions", i64 2}
+!10 = !{!"DetailedSummary", !11}
+!11 = !{!12, !13, !14}
+!12 = !{i32 10000, i64 100, i32 1}
+!13 = !{i32 999000, i64 100, i32 1}
+!14 = !{i32 999999, i64 1, i32 2}
+!15 = !{!"function_entry_count", i64 1000}
+!16 = !{!"branch_weights", i32 1000}
+!17 = !{!"function_entry_count", i32 1500}
+!18 = !{!"branch_weights", i32 1500}
+
+; CHECK-LABEL: @caller(
+; CHECK:  invoke void @inner_callee(
+; CHECK-NEXT: {{.*}} !prof ![[PROF:[0-9]+]]
+
+; CHECK-LABL: @callee(
+; CHECK:  invoke void @inner_callee(
+; CHECK-NEXT: {{.*}} !prof ![[PROF]]
+
+; CHECK: ![[PROF]] = !{!"branch_weights", i32 1500}
diff --git a/llvm/test/Transforms/Inline/update_invoke_value_profile.ll b/llvm/test/Transforms/Inline/update_invoke_value_profile.ll
new file mode 100644
index 00000000000000..156a1262c9621d
--- /dev/null
+++ b/llvm/test/Transforms/Inline/update_invoke_value_profile.ll
@@ -0,0 +1,54 @@
+; A pre-commit test to show that value profiles associated with inovke are not updated.
+; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -S | FileCheck %s
+
+declare i32 @__gxx_personality_v0(...)
+
+define void @caller(ptr %func) personality ptr @__gxx_personality_v0 !prof !15 {
+  call void @callee(ptr %func), !prof !16
+  ret void
+}
+
+declare void @inner_callee(ptr %func)
+
+define void @callee(ptr %func) personality ptr @__gxx_personality_v0 !prof !17 {
+  invoke void %func()
+          to label %ret unwind label %lpad, !prof !18
+
+ret:
+  ret void
+
+lpad:
+  %exn = landingpad {ptr, i32}
+          cleanup
+  unreachable
+}
+
+!llvm.module.flags = !{!1}
+!1 = !{i32 1, !"ProfileSummary", !2}
+!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
+!3 = !{!"ProfileFormat", !"SampleProfile"}
+!4 = !{!"TotalCount", i64 10000}
+!5 = !{!"MaxCount", i64 10}
+!6 = !{!"MaxInternalCount", i64 1}
+!7 = !{!"MaxFunctionCount", i64 2000}
+!8 = !{!"NumCounts", i64 2}
+!9 = !{!"NumFunctions", i64 2}
+!10 = !{!"DetailedSummary", !11}
+!11 = !{!12, !13, !14}
+!12 = !{i32 10000, i64 100, i32 1}
+!13 = !{i32 999000, i64 100, i32 1}
+!14 = !{i32 999999, i64 1, i32 2}
+!15 = !{!"function_entry_count", i64 1000}
+!16 = !{!"VP", i64 1000, i64 123, i64 600, i64 456, i64 400}
+!17 = !{!"function_entry_count", i32 1500}
+!18 = !{!"VP", i64 1500, i64 123, i64 900, i64 456, i64 600}
+
+; CHECK-LABEL: @caller(
+; CHECK:  invoke void %func(
+; CHECK-NEXT: {{.*}} !prof ![[PROF:[0-9]+]]
+
+; CHECK-LABL: @callee(
+; CHECK:  invoke void %func(
+; CHECK-NEXT: {{.*}} !prof ![[PROF]] 
+
+; CHECK: ![[PROF]] = !{!"VP", i64 1500, i64 123, i64 900, i64 456, i64 600}

>From 8555e5aba24fd7f48b332b982b4a9238a38d7bfb Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Tue, 26 Mar 2024 15:21:33 -0700
Subject: [PATCH 5/7] fix mistakes in the test case

---
 llvm/test/Transforms/Inline/update_invoke_counts.ll        | 2 +-
 llvm/test/Transforms/Inline/update_invoke_value_profile.ll | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/test/Transforms/Inline/update_invoke_counts.ll b/llvm/test/Transforms/Inline/update_invoke_counts.ll
index bc0677f0bf0091..1785c4d9858f2d 100644
--- a/llvm/test/Transforms/Inline/update_invoke_counts.ll
+++ b/llvm/test/Transforms/Inline/update_invoke_counts.ll
@@ -1,4 +1,4 @@
-; A pre-commit test to show that branch weight associated with inovke are not updated.
+; A pre-commit test to show that branch weight associated with invoke are not updated.
 ; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -S | FileCheck %s
 
 declare i32 @__gxx_personality_v0(...)
diff --git a/llvm/test/Transforms/Inline/update_invoke_value_profile.ll b/llvm/test/Transforms/Inline/update_invoke_value_profile.ll
index 156a1262c9621d..e58ac0770fc975 100644
--- a/llvm/test/Transforms/Inline/update_invoke_value_profile.ll
+++ b/llvm/test/Transforms/Inline/update_invoke_value_profile.ll
@@ -1,4 +1,4 @@
-; A pre-commit test to show that value profiles associated with inovke are not updated.
+; A pre-commit test to show that value profiles associated with invoke are not updated.
 ; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -S | FileCheck %s
 
 declare i32 @__gxx_personality_v0(...)
@@ -39,7 +39,7 @@ lpad:
 !13 = !{i32 999000, i64 100, i32 1}
 !14 = !{i32 999999, i64 1, i32 2}
 !15 = !{!"function_entry_count", i64 1000}
-!16 = !{!"VP", i64 1000, i64 123, i64 600, i64 456, i64 400}
+!16 = !{!"branch_weights", i64 1000}
 !17 = !{!"function_entry_count", i32 1500}
 !18 = !{!"VP", i64 1500, i64 123, i64 900, i64 456, i64 600}
 

>From a8aef1b69a90cbad791a5de6dc622aa314460201 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Tue, 26 Mar 2024 15:40:46 -0700
Subject: [PATCH 6/7] fix test case

---
 llvm/test/Transforms/Inline/update_invoke_value_profile.ll | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/test/Transforms/Inline/update_invoke_value_profile.ll b/llvm/test/Transforms/Inline/update_invoke_value_profile.ll
index e58ac0770fc975..a8a261ccd204c4 100644
--- a/llvm/test/Transforms/Inline/update_invoke_value_profile.ll
+++ b/llvm/test/Transforms/Inline/update_invoke_value_profile.ll
@@ -41,7 +41,7 @@ lpad:
 !15 = !{!"function_entry_count", i64 1000}
 !16 = !{!"branch_weights", i64 1000}
 !17 = !{!"function_entry_count", i32 1500}
-!18 = !{!"VP", i64 1500, i64 123, i64 900, i64 456, i64 600}
+!18 = !{!"VP", i32 0, i64 1500, i64 123, i64 900, i64 456, i64 600}
 
 ; CHECK-LABEL: @caller(
 ; CHECK:  invoke void %func(
@@ -51,4 +51,4 @@ lpad:
 ; CHECK:  invoke void %func(
 ; CHECK-NEXT: {{.*}} !prof ![[PROF]] 
 
-; CHECK: ![[PROF]] = !{!"VP", i64 1500, i64 123, i64 900, i64 456, i64 600}
+; CHECK: ![[PROF]] = !{!"VP", i32 0, i64 1500, i64 123, i64 900, i64 456, i64 600}

>From fa02c56b2eec1a547ed320efaf4204bfd5febdf3 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Thu, 28 Mar 2024 16:19:30 -0700
Subject: [PATCH 7/7] Do not scale taken/not-taken branch probabilities for
 invoke instructions, and add regression test

---
 llvm/lib/IR/ProfDataUtils.cpp                 | 13 ++++++--
 .../Transforms/Inline/update_invoke_prof.ll   | 33 ++++++++++++++-----
 2 files changed, 35 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/IR/ProfDataUtils.cpp b/llvm/lib/IR/ProfDataUtils.cpp
index dc86f4204b1a1d..a80aabd105b79d 100644
--- a/llvm/lib/IR/ProfDataUtils.cpp
+++ b/llvm/lib/IR/ProfDataUtils.cpp
@@ -201,14 +201,23 @@ void scaleProfData(Instruction &I, uint64_t S, uint64_t T) {
                         !ProfDataName->getString().equals("VP")))
     return;
 
+  // If an instruction is a call and its branch weight has more than two
+  // operands, it represents taken vs not-taken branch probabilities and doesn't
+  // need scaling.
+  if (isa<CallBase>(&I) && ProfDataName->getString().equals("branch_weights") &&
+      ProfileData->getNumOperands() > 2)
+    return;
+
   LLVMContext &C = I.getContext();
 
   MDBuilder MDB(C);
   SmallVector<Metadata *, 3> Vals;
   Vals.push_back(ProfileData->getOperand(0));
   APInt APS(128, S), APT(128, T);
-  if (ProfDataName->getString().equals("branch_weights") &&
-      ProfileData->getNumOperands() > 0) {
+
+  // Scale the counts associated with calls.
+  if (isa<CallBase>(&I) && ProfDataName->getString().equals("branch_weights") &&
+      ProfileData->getNumOperands() == 2) {
     // Using APInt::div may be expensive, but most cases should fit 64 bits.
     APInt Val(128, mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(1))
                        ->getValue()
diff --git a/llvm/test/Transforms/Inline/update_invoke_prof.ll b/llvm/test/Transforms/Inline/update_invoke_prof.ll
index 443785e852a8dc..f6b86dfe5bb1b0 100644
--- a/llvm/test/Transforms/Inline/update_invoke_prof.ll
+++ b/llvm/test/Transforms/Inline/update_invoke_prof.ll
@@ -1,23 +1,31 @@
 ; Test that branch weights and value profiles associated with invoke are updated
-; in both caller and callee after inline.
+; in both caller and callee after inline, but invoke instructions with taken or
+; not taken branch probabilities are not updated.
 ; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -S | FileCheck %s
 
 declare i32 @__gxx_personality_v0(...)
 
 define void @caller(ptr %func) personality ptr @__gxx_personality_v0 !prof !15 {
   call void @callee(ptr %func), !prof !16
+
   ret void
 }
 
-declare void @inner_callee(ptr %func)
+declare void @callee1(ptr %func)
+
+declare void @callee2(ptr %func)
 
 define void @callee(ptr %func) personality ptr @__gxx_personality_v0 !prof !17 {
   invoke void %func()
           to label %next unwind label %lpad, !prof !18
 
 next:
-  invoke void @inner_callee(ptr %func)
-          to label %ret unwind label %lpad, !prof !19
+  invoke void @callee1(ptr %func)
+          to label %cont unwind label %lpad, !prof !19
+
+cont:
+  invoke void @callee2(ptr %func)
+          to label %ret unwind label %lpad, !prof !20
 
 lpad:
   %exn = landingpad {ptr, i32}
@@ -48,20 +56,27 @@ ret:
 !17 = !{!"function_entry_count", i32 1500}
 !18 = !{!"VP", i32 0, i64 1500, i64 123, i64 900, i64 456, i64 600}
 !19 = !{!"branch_weights", i32 1500}
+!20 = !{!"branch_weights", i32 1234, i32 5678}
 
 ; CHECK-LABEL: @caller(
 ; CHECK:  invoke void %func(
 ; CHECK-NEXT: {{.*}} !prof ![[PROF1:[0-9]+]]
-; CHECK:  invoke void @inner_callee(
+; CHECK:  invoke void @callee1(
 ; CHECK-NEXT: {{.*}} !prof ![[PROF2:[0-9]+]]
+; CHECK:  invoke void @callee2(
+; CHECK-NEXT: {{.*}} !prof ![[PROF3:[0-9]+]]
 
 ; CHECK-LABL: @callee(
 ; CHECK:  invoke void %func(
-; CHECK-NEXT: {{.*}} !prof ![[PROF3:[0-9]+]]
-; CHECK:  invoke void @inner_callee(
 ; CHECK-NEXT: {{.*}} !prof ![[PROF4:[0-9]+]]
+; CHECK:  invoke void @callee1(
+; CHECK-NEXT: {{.*}} !prof ![[PROF5:[0-9]+]]
+; CHECK:  invoke void @callee2(
+; CHECK-NEXT: {{.*}} !prof ![[PROF3]]
+
 
 ; CHECK: ![[PROF1]] = !{!"VP", i32 0, i64 1000, i64 123, i64 600, i64 456, i64 400}
 ; CHECK: ![[PROF2]] = !{!"branch_weights", i32 1000}
-; CHECK: ![[PROF3]] = !{!"VP", i32 0, i64 500, i64 123, i64 300, i64 456, i64 200}
-; CHECK: ![[PROF4]] = !{!"branch_weights", i32 500}
+; CHECK: ![[PROF3]] = !{!"branch_weights", i32 1234, i32 5678}
+; CHECK: ![[PROF4]] = !{!"VP", i32 0, i64 500, i64 123, i64 300, i64 456, i64 200}
+; CHECK: ![[PROF5]] = !{!"branch_weights", i32 500}



More information about the llvm-commits mailing list