[llvm] 2d64185 - [nfc][PGO]Factor out profile scaling into a standalone helper function (#83780)

via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 27 11:57:10 PDT 2024


Author: Mingming Liu
Date: 2024-03-27T11:57:07-07:00
New Revision: 2d641858fa44db315a42fa1b5ba43540f1ca1ea4

URL: https://github.com/llvm/llvm-project/commit/2d641858fa44db315a42fa1b5ba43540f1ca1ea4
DIFF: https://github.com/llvm/llvm-project/commit/2d641858fa44db315a42fa1b5ba43540f1ca1ea4.diff

LOG: [nfc][PGO]Factor out profile scaling into a standalone helper function (#83780)

- Put the helper function in `ProfDataUtil.h/cpp`, which is already a
dependency of `Instructions.cpp`
- The helper function could be re-used to update profiles of
`InvokeInst` (in a follow-up pull request)

Added: 
    llvm/test/Transforms/Inline/update_invoke_prof.ll
    llvm/test/Transforms/Inline/update_value_profile.ll

Modified: 
    llvm/include/llvm/IR/ProfDataUtils.h
    llvm/lib/IR/Instructions.cpp
    llvm/lib/IR/ProfDataUtils.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/IR/ProfDataUtils.h b/llvm/include/llvm/IR/ProfDataUtils.h
index 255fa2ff1c7906..c0897408986fb3 100644
--- a/llvm/include/llvm/IR/ProfDataUtils.h
+++ b/llvm/include/llvm/IR/ProfDataUtils.h
@@ -108,5 +108,8 @@ bool extractProfTotalWeight(const Instruction &I, uint64_t &TotalWeights);
 /// a `prof` metadata reference to instruction `I`.
 void setBranchWeights(Instruction &I, ArrayRef<uint32_t> Weights);
 
+/// Scaling the profile data attached to 'I' using the ratio of S/T.
+void scaleProfData(Instruction &I, uint64_t S, uint64_t T);
+
 } // namespace llvm
 #endif

diff  --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp
index c2abe875e0196e..cec02e2ce6338b 100644
--- a/llvm/lib/IR/Instructions.cpp
+++ b/llvm/lib/IR/Instructions.cpp
@@ -833,15 +833,6 @@ CallInst *CallInst::Create(CallInst *CI, ArrayRef<OperandBundleDef> OpB,
 // of S/T. The meaning of "branch_weights" meta data for call instruction is
 // transfered to represent call count.
 void CallInst::updateProfWeight(uint64_t S, uint64_t T) {
-  auto *ProfileData = getMetadata(LLVMContext::MD_prof);
-  if (ProfileData == nullptr)
-    return;
-
-  auto *ProfDataName = dyn_cast<MDString>(ProfileData->getOperand(0));
-  if (!ProfDataName || (!ProfDataName->getString().equals("branch_weights") &&
-                        !ProfDataName->getString().equals("VP")))
-    return;
-
   if (T == 0) {
     LLVM_DEBUG(dbgs() << "Attempting to update profile weights will result in "
                          "div by 0. Ignoring. Likely the function "
@@ -850,42 +841,7 @@ void CallInst::updateProfWeight(uint64_t S, uint64_t T) {
                          "with non-zero prof info.");
     return;
   }
-
-  MDBuilder MDB(getContext());
-  SmallVector<Metadata *, 3> Vals;
-  Vals.push_back(ProfileData->getOperand(0));
-  APInt APS(128, S), APT(128, T);
-  if (ProfDataName->getString().equals("branch_weights") &&
-      ProfileData->getNumOperands() > 0) {
-    // Using APInt::div may be expensive, but most cases should fit 64 bits.
-    APInt Val(128, mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(1))
-                       ->getValue()
-                       .getZExtValue());
-    Val *= APS;
-    Vals.push_back(MDB.createConstant(
-        ConstantInt::get(Type::getInt32Ty(getContext()),
-                         Val.udiv(APT).getLimitedValue(UINT32_MAX))));
-  } else if (ProfDataName->getString().equals("VP"))
-    for (unsigned i = 1; i < ProfileData->getNumOperands(); i += 2) {
-      // The first value is the key of the value profile, which will not change.
-      Vals.push_back(ProfileData->getOperand(i));
-      uint64_t Count =
-          mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(i + 1))
-              ->getValue()
-              .getZExtValue();
-      // Don't scale the magic number.
-      if (Count == NOMORE_ICP_MAGICNUM) {
-        Vals.push_back(ProfileData->getOperand(i + 1));
-        continue;
-      }
-      // Using APInt::div may be expensive, but most cases should fit 64 bits.
-      APInt Val(128, Count);
-      Val *= APS;
-      Vals.push_back(MDB.createConstant(
-          ConstantInt::get(Type::getInt64Ty(getContext()),
-                           Val.udiv(APT).getLimitedValue())));
-    }
-  setMetadata(LLVMContext::MD_prof, MDNode::get(getContext(), Vals));
+  scaleProfData(*this, S, T);
 }
 
 //===----------------------------------------------------------------------===//

diff  --git a/llvm/lib/IR/ProfDataUtils.cpp b/llvm/lib/IR/ProfDataUtils.cpp
index b1a10d0ce5a522..dc86f4204b1a1d 100644
--- a/llvm/lib/IR/ProfDataUtils.cpp
+++ b/llvm/lib/IR/ProfDataUtils.cpp
@@ -190,4 +190,52 @@ void setBranchWeights(Instruction &I, ArrayRef<uint32_t> Weights) {
   I.setMetadata(LLVMContext::MD_prof, BranchWeights);
 }
 
+void scaleProfData(Instruction &I, uint64_t S, uint64_t T) {
+  assert(T != 0 && "Caller should guarantee");
+  auto *ProfileData = I.getMetadata(LLVMContext::MD_prof);
+  if (ProfileData == nullptr)
+    return;
+
+  auto *ProfDataName = dyn_cast<MDString>(ProfileData->getOperand(0));
+  if (!ProfDataName || (!ProfDataName->getString().equals("branch_weights") &&
+                        !ProfDataName->getString().equals("VP")))
+    return;
+
+  LLVMContext &C = I.getContext();
+
+  MDBuilder MDB(C);
+  SmallVector<Metadata *, 3> Vals;
+  Vals.push_back(ProfileData->getOperand(0));
+  APInt APS(128, S), APT(128, T);
+  if (ProfDataName->getString().equals("branch_weights") &&
+      ProfileData->getNumOperands() > 0) {
+    // Using APInt::div may be expensive, but most cases should fit 64 bits.
+    APInt Val(128, mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(1))
+                       ->getValue()
+                       .getZExtValue());
+    Val *= APS;
+    Vals.push_back(MDB.createConstant(ConstantInt::get(
+        Type::getInt32Ty(C), Val.udiv(APT).getLimitedValue(UINT32_MAX))));
+  } else if (ProfDataName->getString().equals("VP"))
+    for (unsigned i = 1; i < ProfileData->getNumOperands(); i += 2) {
+      // The first value is the key of the value profile, which will not change.
+      Vals.push_back(ProfileData->getOperand(i));
+      uint64_t Count =
+          mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(i + 1))
+              ->getValue()
+              .getZExtValue();
+      // Don't scale the magic number.
+      if (Count == NOMORE_ICP_MAGICNUM) {
+        Vals.push_back(ProfileData->getOperand(i + 1));
+        continue;
+      }
+      // Using APInt::div may be expensive, but most cases should fit 64 bits.
+      APInt Val(128, Count);
+      Val *= APS;
+      Vals.push_back(MDB.createConstant(ConstantInt::get(
+          Type::getInt64Ty(C), Val.udiv(APT).getLimitedValue())));
+    }
+  I.setMetadata(LLVMContext::MD_prof, MDNode::get(C, Vals));
+}
+
 } // namespace llvm

diff  --git a/llvm/test/Transforms/Inline/update_invoke_prof.ll b/llvm/test/Transforms/Inline/update_invoke_prof.ll
new file mode 100644
index 00000000000000..5f09c7cf8fe095
--- /dev/null
+++ b/llvm/test/Transforms/Inline/update_invoke_prof.ll
@@ -0,0 +1,64 @@
+; A pre-commit test to show that branch weights and value profiles associated with invoke are not updated.
+; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -S | FileCheck %s
+
+declare i32 @__gxx_personality_v0(...)
+
+define void @caller(ptr %func) personality ptr @__gxx_personality_v0 !prof !15 {
+  call void @callee(ptr %func), !prof !16
+  ret void
+}
+
+declare void @inner_callee(ptr %func)
+
+define void @callee(ptr %func) personality ptr @__gxx_personality_v0 !prof !17 {
+  invoke void %func()
+          to label %next unwind label %lpad, !prof !18
+
+next:
+  invoke void @inner_callee(ptr %func)
+          to label %ret unwind label %lpad, !prof !19
+
+lpad:
+  %exn = landingpad {ptr, i32}
+          cleanup
+  unreachable
+
+ret:
+  ret void
+}
+
+!llvm.module.flags = !{!1}
+!1 = !{i32 1, !"ProfileSummary", !2}
+!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
+!3 = !{!"ProfileFormat", !"SampleProfile"}
+!4 = !{!"TotalCount", i64 10000}
+!5 = !{!"MaxCount", i64 10}
+!6 = !{!"MaxInternalCount", i64 1}
+!7 = !{!"MaxFunctionCount", i64 2000}
+!8 = !{!"NumCounts", i64 2}
+!9 = !{!"NumFunctions", i64 2}
+!10 = !{!"DetailedSummary", !11}
+!11 = !{!12, !13, !14}
+!12 = !{i32 10000, i64 100, i32 1}
+!13 = !{i32 999000, i64 100, i32 1}
+!14 = !{i32 999999, i64 1, i32 2}
+!15 = !{!"function_entry_count", i64 1000}
+!16 = !{!"branch_weights", i64 1000}
+!17 = !{!"function_entry_count", i32 1500}
+!18 = !{!"VP", i32 0, i64 1500, i64 123, i64 900, i64 456, i64 600}
+!19 = !{!"branch_weights", i32 1500}
+
+; CHECK-LABEL: @caller(
+; CHECK:  invoke void %func(
+; CHECK-NEXT: {{.*}} !prof ![[PROF1:[0-9]+]]
+; CHECK:  invoke void @inner_callee(
+; CHECK-NEXT: {{.*}} !prof ![[PROF2:[0-9]+]]
+
+; CHECK-LABL: @callee(
+; CHECK:  invoke void %func(
+; CHECK-NEXT: {{.*}} !prof ![[PROF1]] 
+; CHECK:  invoke void @inner_callee(
+; CHECK-NEXT: {{.*}} !prof ![[PROF2]]
+
+; CHECK: ![[PROF1]] = !{!"VP", i32 0, i64 1500, i64 123, i64 900, i64 456, i64 600}
+; CHECK: ![[PROF2]] = !{!"branch_weights", i32 1500}

diff  --git a/llvm/test/Transforms/Inline/update_value_profile.ll b/llvm/test/Transforms/Inline/update_value_profile.ll
new file mode 100644
index 00000000000000..daa95e93b68ec5
--- /dev/null
+++ b/llvm/test/Transforms/Inline/update_value_profile.ll
@@ -0,0 +1,81 @@
+; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -inline-threshold=100 -S | FileCheck %s
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; When 'callee' is inlined into caller1 and caller2, the indirect call value
+; profiles of the inlined copy should be scaled based on callers' profiles,
+; and the indirect call value profiles in 'callee' should be updated.
+define i32 @callee(ptr %0, i32 %1) !prof !20 {
+; CHECK-LABEL: define i32 @callee(
+; CHECK-SAME: ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) !prof [[PROF0:![0-9]+]] {
+; CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP0]], align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 8
+; CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+; CHECK-NEXT:    [[TMP6:%.*]] = tail call i32 [[TMP5]](ptr [[TMP0]], i32 [[TMP1]]), !prof [[PROF1:![0-9]+]]
+; CHECK-NEXT:    ret i32 [[TMP6]]
+;
+  %3 = load ptr, ptr %0
+  %5 = getelementptr inbounds i8, ptr %3, i64 8
+  %6 = load ptr, ptr %5
+  %7 = tail call i32 %6(ptr %0, i32 %1), !prof !17
+  ret i32 %7
+}
+
+define i32 @caller1(i32 %0) !prof !18 {
+; CHECK-LABEL: define i32 @caller1(
+; CHECK-SAME: i32 [[TMP0:%.*]]) !prof [[PROF2:![0-9]+]] {
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call ptr @_Z10createTypei(i32 [[TMP0]])
+; CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 8
+; CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+; CHECK-NEXT:    [[TMP6:%.*]] = tail call i32 [[TMP5]](ptr [[TMP2]], i32 [[TMP0]]), !prof [[PROF3:![0-9]+]]
+; CHECK-NEXT:    ret i32 [[TMP6]]
+;
+  %2 = tail call ptr @_Z10createTypei(i32 %0)
+  %3 = tail call i32 @callee(ptr %2, i32 %0)
+  ret i32 %3
+}
+
+define i32 @caller2(i32 %0) !prof !19  {
+; CHECK-LABEL: define i32 @caller2(
+; CHECK-SAME: i32 [[TMP0:%.*]]) !prof [[PROF4:![0-9]+]] {
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call ptr @_Z10createTypei(i32 [[TMP0]])
+; CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 8
+; CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+; CHECK-NEXT:    [[TMP6:%.*]] = tail call i32 [[TMP5]](ptr [[TMP2]], i32 [[TMP0]]), !prof [[PROF5:![0-9]+]]
+; CHECK-NEXT:    ret i32 [[TMP6]]
+;
+  %2 = tail call ptr @_Z10createTypei(i32 %0)
+  %3 = tail call i32 @callee(ptr %2, i32 %0)
+  ret i32 %3
+}
+
+declare ptr @_Z10createTypei(i32)
+
+!1 = !{i32 1, !"ProfileSummary", !2}
+!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
+!3 = !{!"ProfileFormat", !"InstrProf"}
+!4 = !{!"TotalCount", i64 10000}
+!5 = !{!"MaxCount", i64 10}
+!6 = !{!"MaxInternalCount", i64 1}
+!7 = !{!"MaxFunctionCount", i64 1000}
+!8 = !{!"NumCounts", i64 3}
+!9 = !{!"NumFunctions", i64 3}
+!10 = !{!"DetailedSummary", !11}
+!11 = !{!12, !13, !14}
+!12 = !{i32 10000, i64 100, i32 1}
+!13 = !{i32 999000, i64 100, i32 1}
+!14 = !{i32 999999, i64 1, i32 2}
+!17 = !{!"VP", i32 0, i64 1600, i64 123, i64 1000, i64 456, i64 600}
+!18 = !{!"function_entry_count", i64 1000}
+!19 = !{!"function_entry_count", i64 600}
+!20 = !{!"function_entry_count", i64 1700}
+;.
+; CHECK: [[PROF0]] = !{!"function_entry_count", i64 100}
+; CHECK: [[PROF1]] = !{!"VP", i32 0, i64 94, i64 123, i64 58, i64 456, i64 35}
+; CHECK: [[PROF2]] = !{!"function_entry_count", i64 1000}
+; CHECK: [[PROF3]] = !{!"VP", i32 0, i64 941, i64 123, i64 588, i64 456, i64 352}
+; CHECK: [[PROF4]] = !{!"function_entry_count", i64 600}
+; CHECK: [[PROF5]] = !{!"VP", i32 0, i64 564, i64 123, i64 352, i64 456, i64 211}
+;.


        


More information about the llvm-commits mailing list