[llvm-branch-commits] [llvm] [Inline]Update value profile for non-call instructions (PR #83769)

Mingming Liu via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Sun Mar 3 22:17:08 PST 2024


https://github.com/minglotus-6 created https://github.com/llvm/llvm-project/pull/83769

None

>From 04a2bca6ee0fbea6a9dc84f59e8bf4a41f8ae230 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Sun, 3 Mar 2024 22:16:03 -0800
Subject: [PATCH] [Inline]Update value profile for non-call instructions

---
 llvm/include/llvm/IR/ProfDataUtils.h          |  3 +
 llvm/lib/IR/ProfDataUtils.cpp                 | 32 +++++++
 llvm/lib/Transforms/Utils/InlineFunction.cpp  | 26 +++++-
 .../Transforms/Inline/update_value_profile.ll | 89 +++++++++++++++++++
 4 files changed, 147 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/Transforms/Inline/update_value_profile.ll

diff --git a/llvm/include/llvm/IR/ProfDataUtils.h b/llvm/include/llvm/IR/ProfDataUtils.h
index 255fa2ff1c7906..2010c4bc1e8b34 100644
--- a/llvm/include/llvm/IR/ProfDataUtils.h
+++ b/llvm/include/llvm/IR/ProfDataUtils.h
@@ -108,5 +108,8 @@ bool extractProfTotalWeight(const Instruction &I, uint64_t &TotalWeights);
 /// a `prof` metadata reference to instruction `I`.
 void setBranchWeights(Instruction &I, ArrayRef<uint32_t> Weights);
 
+/// Scaling value profile 'ProfData' using the ratio of S/T.
+MDNode *scaleValueProfile(const MDNode *ProfData, uint64_t S, uint64_t T);
+
 } // namespace llvm
 #endif
diff --git a/llvm/lib/IR/ProfDataUtils.cpp b/llvm/lib/IR/ProfDataUtils.cpp
index dcb057c1b25fd8..db91a66bf493ec 100644
--- a/llvm/lib/IR/ProfDataUtils.cpp
+++ b/llvm/lib/IR/ProfDataUtils.cpp
@@ -190,4 +190,36 @@ void setBranchWeights(Instruction &I, ArrayRef<uint32_t> Weights) {
   I.setMetadata(LLVMContext::MD_prof, BranchWeights);
 }
 
+MDNode *scaleValueProfile(const MDNode *ProfData, uint64_t S, uint64_t T) {
+  if (ProfData == nullptr)
+    return nullptr;
+  assert(
+      dyn_cast<MDString>(ProfData->getOperand(0))->getString().equals("VP") &&
+      "Expects value profile metadata");
+  LLVMContext &C = ProfData->getContext();
+  MDBuilder MDB(C);
+  APInt APS(128, S), APT(128, T);
+
+  SmallVector<Metadata *, 3> Vals;
+  Vals.push_back(ProfData->getOperand(0));
+  for (unsigned i = 1; i < ProfData->getNumOperands(); i += 2) {
+    Vals.push_back(ProfData->getOperand(i));
+    uint64_t Count =
+        mdconst::dyn_extract<ConstantInt>(ProfData->getOperand(i + 1))
+            ->getValue()
+            .getZExtValue();
+    // Don't scale the magic number.
+    if (Count == NOMORE_ICP_MAGICNUM) {
+      Vals.push_back(ProfData->getOperand(i + 1));
+      continue;
+    }
+    // Using APInt::div may be expensive, but most cases should fit 64 bits.
+    APInt Val(128, Count);
+    Val *= APS;
+    Vals.push_back(MDB.createConstant(ConstantInt::get(
+        Type::getInt64Ty(C), Val.udiv(APT).getLimitedValue())));
+  }
+  return MDNode::get(C, Vals);
+}
+
 } // namespace llvm
diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index d4d4bf5ebdf36e..7cc1641a207aef 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -23,6 +23,7 @@
 #include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/IndirectCallVisitor.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/MemoryProfileInfo.h"
 #include "llvm/Analysis/ObjCARCAnalysisUtils.h"
@@ -30,8 +31,8 @@
 #include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Analysis/VectorUtils.h"
-#include "llvm/IR/AttributeMask.h"
 #include "llvm/IR/Argument.h"
+#include "llvm/IR/AttributeMask.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/Constant.h"
@@ -55,6 +56,7 @@
 #include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/ProfDataUtils.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/User.h"
 #include "llvm/IR/Value.h"
@@ -1910,9 +1912,18 @@ void llvm::updateProfileCallee(
   if (VMap) {
     uint64_t CloneEntryCount = PriorEntryCount - NewEntryCount;
     for (auto Entry : *VMap)
+      // FIXME: Update the profiles for invoke instruction after inline
       if (isa<CallInst>(Entry.first))
-        if (auto *CI = dyn_cast_or_null<CallInst>(Entry.second))
+        if (auto *CI = dyn_cast_or_null<CallInst>(Entry.second)) {
           CI->updateProfWeight(CloneEntryCount, PriorEntryCount);
+          Instruction *VPtr =
+              PGOIndirectCallVisitor::tryGetVTableInstruction(CI);
+          if (VPtr)
+            VPtr->setMetadata(
+                LLVMContext::MD_prof,
+                scaleValueProfile(VPtr->getMetadata(LLVMContext::MD_prof),
+                                  CloneEntryCount, PriorEntryCount));
+        }
   }
 
   if (EntryDelta) {
@@ -1922,8 +1933,17 @@ void llvm::updateProfileCallee(
       // No need to update the callsite if it is pruned during inlining.
       if (!VMap || VMap->count(&BB))
         for (Instruction &I : BB)
-          if (CallInst *CI = dyn_cast<CallInst>(&I))
+          // FIXME: Update the profiles for invoke instruction after inline
+          if (CallInst *CI = dyn_cast<CallInst>(&I)) {
             CI->updateProfWeight(NewEntryCount, PriorEntryCount);
+            Instruction *VPtr =
+                PGOIndirectCallVisitor::tryGetVTableInstruction(CI);
+            if (VPtr)
+              VPtr->setMetadata(
+                  LLVMContext::MD_prof,
+                  scaleValueProfile(VPtr->getMetadata(LLVMContext::MD_prof),
+                                    NewEntryCount, PriorEntryCount));
+          }
   }
 }
 
diff --git a/llvm/test/Transforms/Inline/update_value_profile.ll b/llvm/test/Transforms/Inline/update_value_profile.ll
new file mode 100644
index 00000000000000..ae59a2d40a6c23
--- /dev/null
+++ b/llvm/test/Transforms/Inline/update_value_profile.ll
@@ -0,0 +1,89 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -inline-threshold=100 -S | FileCheck %s
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+;@_ZTV4Base = constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @_ZN4Base3keyEv, ptr @_ZN4Base4funcEi] }
+;@_ZTV7Derived = constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @_ZN4Base3keyEv, ptr @_ZN7Derived4funcEi] }
+
+define i32 @callee(ptr %0, i32 %1) !prof !20 {
+; CHECK-LABEL: define i32 @callee(
+; CHECK-SAME: ptr [[TMP0:%.*]], i32 [[TMP1:%.*]]) !prof [[PROF0:![0-9]+]] {
+; CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP0]], align 8, !prof [[PROF1:![0-9]+]]
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 8
+; CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+; CHECK-NEXT:    [[TMP6:%.*]] = tail call i32 [[TMP5]](ptr [[TMP0]], i32 [[TMP1]]), !prof [[PROF2:![0-9]+]]
+; CHECK-NEXT:    ret i32 [[TMP6]]
+;
+  %3 = load ptr, ptr %0, !prof !21
+  %5 = getelementptr inbounds i8, ptr %3, i64 8
+  %6 = load ptr, ptr %5
+  %7 = tail call i32 %6(ptr %0, i32 %1), !prof !17
+  ret i32 %7
+}
+
+define i32 @caller1(i32 %0) !prof !18 {
+; CHECK-LABEL: define i32 @caller1(
+; CHECK-SAME: i32 [[TMP0:%.*]]) !prof [[PROF3:![0-9]+]] {
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call ptr @_Z10createTypei(i32 [[TMP0]])
+; CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !prof [[PROF4:![0-9]+]]
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 8
+; CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+; CHECK-NEXT:    [[TMP6:%.*]] = tail call i32 [[TMP5]](ptr [[TMP2]], i32 [[TMP0]]), !prof [[PROF5:![0-9]+]]
+; CHECK-NEXT:    ret i32 [[TMP6]]
+;
+  %2 = tail call ptr @_Z10createTypei(i32 %0)
+  %3 = tail call i32 @callee(ptr %2, i32 %0)
+  ret i32 %3
+}
+
+define i32 @caller2(i32 %0) !prof !19  {
+; CHECK-LABEL: define i32 @caller2(
+; CHECK-SAME: i32 [[TMP0:%.*]]) !prof [[PROF6:![0-9]+]] {
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call ptr @_Z10createTypei(i32 [[TMP0]])
+; CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !prof [[PROF7:![0-9]+]]
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i64 8
+; CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8
+; CHECK-NEXT:    [[TMP6:%.*]] = tail call i32 [[TMP5]](ptr [[TMP2]], i32 [[TMP0]]), !prof [[PROF8:![0-9]+]]
+; CHECK-NEXT:    ret i32 [[TMP6]]
+;
+  %2 = tail call ptr @_Z10createTypei(i32 %0)
+  %3 = tail call i32 @callee(ptr %2, i32 %0)
+  ret i32 %3
+}
+
+declare ptr @_Z10createTypei(i32)
+
+!1 = !{i32 1, !"ProfileSummary", !2}
+!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
+!3 = !{!"ProfileFormat", !"InstrProf"}
+!4 = !{!"TotalCount", i64 10000}
+!5 = !{!"MaxCount", i64 10}
+!6 = !{!"MaxInternalCount", i64 1}
+!7 = !{!"MaxFunctionCount", i64 1000}
+!8 = !{!"NumCounts", i64 3}
+!9 = !{!"NumFunctions", i64 3}
+!10 = !{!"DetailedSummary", !11}
+!11 = !{!12, !13, !14}
+!12 = !{i32 10000, i64 100, i32 1}
+!13 = !{i32 999000, i64 100, i32 1}
+!14 = !{i32 999999, i64 1, i32 2}
+;!15 = !{i64 16, !"_ZTS4Base"}
+;!16= !{i64 16, !"_ZTS7Derived"}
+!17 = !{!"VP", i32 0, i64 1600, i64 15186643663281959480, i64 1000, i64 15101948577241817854, i64 600}
+!18 = !{!"function_entry_count", i64 1000}
+!19 = !{!"function_entry_count", i64 600}
+!20 = !{!"function_entry_count", i64 1700}
+!21 = !{!"VP", i32 2, i64 1600, i64 1960855528937986108, i64 1000, i64 13870436605473471591, i64 600}
+
+;.
+; CHECK: [[PROF0]] = !{!"function_entry_count", i64 100}
+; CHECK: [[PROF1]] = !{!"VP", i32 2, i64 94, i64 1960855528937986108, i64 58, i64 -4576307468236080025, i64 35}
+; CHECK: [[PROF2]] = !{!"VP", i32 0, i64 94, i64 -3260100410427592136, i64 58, i64 -3344795496467733762, i64 35}
+; CHECK: [[PROF3]] = !{!"function_entry_count", i64 1000}
+; CHECK: [[PROF4]] = !{!"VP", i32 2, i64 941, i64 1960855528937986108, i64 588, i64 -4576307468236080025, i64 352}
+; CHECK: [[PROF5]] = !{!"VP", i32 0, i64 941, i64 -3260100410427592136, i64 588, i64 -3344795496467733762, i64 352}
+; CHECK: [[PROF6]] = !{!"function_entry_count", i64 600}
+; CHECK: [[PROF7]] = !{!"VP", i32 2, i64 564, i64 1960855528937986108, i64 352, i64 -4576307468236080025, i64 211}
+; CHECK: [[PROF8]] = !{!"VP", i32 0, i64 564, i64 -3260100410427592136, i64 352, i64 -3344795496467733762, i64 211}
+;.



More information about the llvm-branch-commits mailing list