[compiler-rt] [llvm] [TypeProf][InstrFDO]Implement more efficient comparison sequence for indirect-call-promotion with vtable profiles. (PR #81442)

via llvm-commits llvm-commits at lists.llvm.org
Tue May 28 08:53:47 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-analysis

Author: Mingming Liu (minglotus-6)

<details>
<summary>Changes</summary>

Clang's `-fwhole-program-vtables` is required for this optimization to take place. If `-fwhole-program-vtables` is not enabled, this change is no-op.
    
* Function-comparison (before):

```
%vtable = load ptr, ptr %obj
%vfn = getelementptr inbounds ptr, ptr %vtable, i64 1
%func = load ptr, ptr %vfn
%cond = icmp eq ptr %func, @<!-- -->callee
br i1 %cond, label bb1, label bb2:

bb1:
   call @<!-- -->callee

bb2:
   call %func
```

* VTable-comparison (after):

```
%vtable = load ptr, ptr %obj
%cond = icmp eq ptr %func, @<!-- -->vtable-address-point
br i1 %cond, label bb1, label bb2:

bb1:
   call @<!-- -->callee

bb2:
  %vfn = getelementptr inbounds ptr, ptr %vtable, i64 1
  %func = load ptr, ptr %vfn
  call %func
```
    
Key changes:
1. Find out virtual calls and the vtables they come from.
    - The ICP relies on type intrinsic `llvm.type.test` and `llvm.public.type.test` to find out virtual calls and the
        compatible vtables, and relies on type metadata to find the address point for comparison.
2. ICP pass does cost-benefit analysis and compares vtable only when the number of vtables for a function candidate is within (option specified) threshold.
3. Sink the function addressing and vtable load instruction to indirect fallback.
     - The sink helper functions are simplified versions of
         `InstCombinerImpl::tryToSinkInstruction`.
     - The helper functions to handle debug intrinsics are copied from
         `InstCombinerImpl::tryToSinkInstructionDbgValues` and
         `InstCombinerImpl::tryToSinkInstructionDbgVariableRecords` into
         Transforms/Utils/Local.cpp. Ideally only one copy should exist
         for inst-combine, icp and other passes.
4. Keep value profiles updated
     1) Update vtable value profiles after inline
     2) For either function-based comparison or vtable-based comparison,
          update both vtable and indirect call value profiles.



---

Patch is 83.36 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/81442.diff


13 Files Affected:

- (modified) compiler-rt/test/profile/Linux/instrprof-vtable-value-prof.cpp (+60-44) 
- (modified) llvm/include/llvm/Analysis/IndirectCallPromotionAnalysis.h (+1-1) 
- (modified) llvm/include/llvm/Analysis/IndirectCallVisitor.h (+3) 
- (modified) llvm/include/llvm/Transforms/Utils/Local.h (+9) 
- (modified) llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp (+3-3) 
- (modified) llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp (+595-32) 
- (modified) llvm/lib/Transforms/Utils/InlineFunction.cpp (+31-5) 
- (modified) llvm/lib/Transforms/Utils/Local.cpp (+184) 
- (modified) llvm/test/Transforms/Inline/update_invoke_prof.ll (+46-28) 
- (modified) llvm/test/Transforms/Inline/update_value_profile.ll (+29-25) 
- (added) llvm/test/Transforms/PGOProfile/icp_vtable_cmp.ll (+139) 
- (added) llvm/test/Transforms/PGOProfile/icp_vtable_invoke.ll (+127) 
- (added) llvm/test/Transforms/PGOProfile/icp_vtable_tail_call.ll (+67) 


``````````diff
diff --git a/compiler-rt/test/profile/Linux/instrprof-vtable-value-prof.cpp b/compiler-rt/test/profile/Linux/instrprof-vtable-value-prof.cpp
index e51805bdf923c..73921adcc0c15 100644
--- a/compiler-rt/test/profile/Linux/instrprof-vtable-value-prof.cpp
+++ b/compiler-rt/test/profile/Linux/instrprof-vtable-value-prof.cpp
@@ -5,59 +5,61 @@
 // ld.lld: error: /lib/../lib64/Scrt1.o: ABI version 1 is not supported
 // UNSUPPORTED: ppc && host-byteorder-big-endian
 
-// RUN: %clangxx_pgogen -fuse-ld=lld -O2 -g -fprofile-generate=. -mllvm -enable-vtable-value-profiling %s -o %t-test
-// RUN: env LLVM_PROFILE_FILE=%t-test.profraw %t-test
+// RUN: rm -rf %t && mkdir %t && cd %t
+
+// RUN: %clangxx_pgogen -fuse-ld=lld -O2 -fprofile-generate=. -mllvm -enable-vtable-value-profiling %s -o test
+// RUN: env LLVM_PROFILE_FILE=test.profraw ./test
 
 // Show vtable profiles from raw profile.
-// RUN: llvm-profdata show --function=main --ic-targets --show-vtables %t-test.profraw | FileCheck %s --check-prefixes=COMMON,RAW
+// RUN: llvm-profdata show --function=main --ic-targets --show-vtables test.profraw | FileCheck %s --check-prefixes=COMMON,RAW
 
 // Generate indexed profile from raw profile and show the data.
-// RUN: llvm-profdata merge %t-test.profraw -o %t-test.profdata
-// RUN: llvm-profdata show --function=main --ic-targets --show-vtables %t-test.profdata | FileCheck %s --check-prefixes=COMMON,INDEXED
+// RUN: llvm-profdata merge test.profraw -o test.profdata
+// RUN: llvm-profdata show --function=main --ic-targets --show-vtables test.profdata | FileCheck %s --check-prefixes=COMMON,INDEXED
 
 // Generate text profile from raw and indexed profiles respectively and show the data.
-// RUN: llvm-profdata merge --text %t-test.profraw -o %t-raw.proftext
-// RUN: llvm-profdata show --function=main --ic-targets --show-vtables --text %t-raw.proftext | FileCheck %s --check-prefix=ICTEXT
-// RUN: llvm-profdata merge --text %t-test.profdata -o %t-indexed.proftext
-// RUN: llvm-profdata show --function=main --ic-targets --show-vtables --text %t-indexed.proftext | FileCheck %s --check-prefix=ICTEXT
+// RUN: llvm-profdata merge --text test.profraw -o raw.proftext
+// RUN: llvm-profdata show --function=main --ic-targets --show-vtables --text raw.proftext | FileCheck %s --check-prefix=ICTEXT
+// RUN: llvm-profdata merge --text test.profdata -o indexed.proftext
+// RUN: llvm-profdata show --function=main --ic-targets --show-vtables --text indexed.proftext | FileCheck %s --check-prefix=ICTEXT
 
 // Generate indexed profile from text profiles and show the data
-// RUN: llvm-profdata merge --binary %t-raw.proftext -o %t-text.profraw
-// RUN: llvm-profdata show --function=main --ic-targets --show-vtables %t-text.profraw | FileCheck %s --check-prefixes=COMMON,INDEXED
-// RUN: llvm-profdata merge --binary %t-indexed.proftext -o %t-text.profdata
-// RUN: llvm-profdata show --function=main --ic-targets --show-vtables %t-text.profdata | FileCheck %s --check-prefixes=COMMON,INDEXED
+// RUN: llvm-profdata merge --binary raw.proftext -o text.profraw
+// RUN: llvm-profdata show --function=main --ic-targets --show-vtables text.profraw | FileCheck %s --check-prefixes=COMMON,INDEXED
+// RUN: llvm-profdata merge --binary indexed.proftext -o text.profdata
+// RUN: llvm-profdata show --function=main --ic-targets --show-vtables text.profdata | FileCheck %s --check-prefixes=COMMON,INDEXED
 
 // COMMON: Counters:
 // COMMON-NEXT:  main:
-// COMMON-NEXT:  Hash: 0x0f9a16fe6d398548
-// COMMON-NEXT:  Counters: 2
+// COMMON-NEXT:  Hash: 0x068617320ec408a0
+// COMMON-NEXT:  Counters: 4
 // COMMON-NEXT:  Indirect Call Site Count: 2
 // COMMON-NEXT:  Number of instrumented vtables: 2
 // RAW:  Indirect Target Results:
-// RAW-NEXT:       [  0, _ZN8Derived15func1Eii,        250 ] (25.00%)
-// RAW-NEXT:       [  0, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived25func1Eii,        750 ] (75.00%)
-// RAW-NEXT:       [  1, _ZN8Derived15func2Eii,        250 ] (25.00%)
-// RAW-NEXT:       [  1, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived25func2Eii,        750 ] (75.00%)
+// RAW-NEXT:       [  0, _ZN8Derived14funcEii,        50 ] (25.00%)
+// RAW-NEXT:       [  0, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived24funcEii,        150 ] (75.00%)
+// RAW-NEXT:       [  1, _ZN8Derived1D0Ev,        250 ] (25.00%)
+// RAW-NEXT:       [  1, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived2D0Ev,        750 ] (75.00%)
 // RAW-NEXT:  VTable Results:
-// RAW-NEXT:       [  0, _ZTV8Derived1,        250 ] (25.00%)
-// RAW-NEXT:       [  0, {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E,        750 ] (75.00%)
+// RAW-NEXT:       [  0, _ZTV8Derived1,        50 ] (25.00%)
+// RAW-NEXT:       [  0, {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E,        150 ] (75.00%)
 // RAW-NEXT:       [  1, _ZTV8Derived1,        250 ] (25.00%)
 // RAW-NEXT:       [  1, {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E,        750 ] (75.00%)
 // INDEXED:     Indirect Target Results:
-// INDEXED-NEXT:         [  0, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived25func1Eii,        750 ] (75.00%)
-// INDEXED-NEXT:         [  0, _ZN8Derived15func1Eii,        250 ] (25.00%)
-// INDEXED-NEXT:         [  1, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived25func2Eii,        750 ] (75.00%)
-// INDEXED-NEXT:         [  1, _ZN8Derived15func2Eii,        250 ] (25.00%)
+// INDEXED-NEXT:         [  0, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived24funcEii,        150 ] (75.00%)
+// INDEXED-NEXT:         [  0, _ZN8Derived14funcEii,        50 ] (25.00%)
+// INDEXED-NEXT:         [  1, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived2D0Ev,        750 ] (75.00%)
+// INDEXED-NEXT:         [  1, _ZN8Derived1D0Ev,        250 ] (25.00%)
 // INDEXED-NEXT:     VTable Results:
-// INDEXED-NEXT:         [  0, {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E,        750 ] (75.00%)
-// INDEXED-NEXT:         [  0, _ZTV8Derived1,        250 ] (25.00%)
+// INDEXED-NEXT:         [  0, {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E,        150 ] (75.00%)
+// INDEXED-NEXT:         [  0, _ZTV8Derived1,        50 ] (25.00%)
 // INDEXED-NEXT:         [  1, {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E,        750 ] (75.00%)
 // INDEXED-NEXT:         [  1, _ZTV8Derived1,        250 ] (25.00%)
 // COMMON: Instrumentation level: IR  entry_first = 0
 // COMMON-NEXT: Functions shown: 1
-// COMMON-NEXT: Total functions: 6
+// COMMON-NEXT: Total functions: 7
 // COMMON-NEXT: Maximum function count: 1000
-// COMMON-NEXT: Maximum internal block count: 250
+// COMMON-NEXT: Maximum internal block count: 1000
 // COMMON-NEXT: Statistics for indirect call sites profile:
 // COMMON-NEXT:   Total number of sites: 2
 // COMMON-NEXT:   Total number of sites with values: 2
@@ -76,11 +78,13 @@
 // ICTEXT: :ir
 // ICTEXT: main
 // ICTEXT: # Func Hash:
-// ICTEXT: 1124236338992350536
+// ICTEXT: 470088714870327456
 // ICTEXT: # Num Counters:
-// ICTEXT: 2
+// ICTEXT: 4
 // ICTEXT: # Counter Values:
 // ICTEXT: 1000
+// ICTEXT: 1000
+// ICTEXT: 200
 // ICTEXT: 1
 // ICTEXT: # Num Value Kinds:
 // ICTEXT: 2
@@ -89,41 +93,50 @@
 // ICTEXT: # NumValueSites:
 // ICTEXT: 2
 // ICTEXT: 2
-// ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived25func1Eii:750
-// ICTEXT: _ZN8Derived15func1Eii:250
+// ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived24funcEii:150
+// ICTEXT: _ZN8Derived14funcEii:50
 // ICTEXT: 2
-// ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived25func2Eii:750
-// ICTEXT: _ZN8Derived15func2Eii:250
+// ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived2D0Ev:750
+// ICTEXT: _ZN8Derived1D0Ev:250
 // ICTEXT: # ValueKind = IPVK_VTableTarget:
 // ICTEXT: 2
 // ICTEXT: # NumValueSites:
 // ICTEXT: 2
 // ICTEXT: 2
-// ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E:750
-// ICTEXT: _ZTV8Derived1:250
+// ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E:150
+// ICTEXT: _ZTV8Derived1:50
 // ICTEXT: 2
 // ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E:750
 // ICTEXT: _ZTV8Derived1:250
 
+// Test indirect call promotion transformation using vtable profiles.
+// RUN: %clangxx -fprofile-use=test.profdata -fuse-ld=lld -flto=thin -fwhole-program-vtables -O2 -mllvm -enable-vtable-value-profiling -mllvm -icp-enable-vtable-cmp -Rpass=pgo-icall-prom %s 2>&1 | FileCheck %s --check-prefix=REMARK --implicit-check-not="!VP"
+
+// REMARK: Promote indirect call to _ZN12_GLOBAL__N_18Derived24funcEii with count 150 out of 200, compare 1 vtables and sink 1 instructions
+// REMARK: Promote indirect call to _ZN8Derived14funcEii with count 50 out of 50, compare 1 vtables and sink 1 instructions
+// REMARK: Promote indirect call to _ZN12_GLOBAL__N_18Derived2D0Ev with count 750 out of 1000, compare 1 vtables and sink 2 instructions
+// REMARK: Promote indirect call to _ZN8Derived1D0Ev with count 250 out of 250, compare 1 vtables and sink 2 instructions
+
 #include <cstdio>
 #include <cstdlib>
 class Base {
 public:
-  virtual int func1(int a, int b) = 0;
-  virtual int func2(int a, int b) = 0;
+  virtual int func(int a, int b) = 0;
+
+  virtual ~Base() {};
 };
 class Derived1 : public Base {
 public:
-  int func1(int a, int b) override { return a + b; }
+  int func(int a, int b) override { return a * b; }
 
-  int func2(int a, int b) override { return a * b; }
+  ~Derived1() {}
 };
 namespace {
 class Derived2 : public Base {
 public:
-  int func1(int a, int b) override { return a - b; }
+  int func(int a, int b) override { return a * (a - b); }
 
-  int func2(int a, int b) override { return a * (a - b); }
+  ~Derived2() {}
 };
 } // namespace
 __attribute__((noinline)) Base *createType(int a) {
@@ -140,7 +153,10 @@ int main(int argc, char **argv) {
     int a = rand();
     int b = rand();
     Base *ptr = createType(i);
-    sum += ptr->func1(a, b) + ptr->func2(b, a);
+    if (i % 5 == 0)
+      sum += ptr->func(b, a);
+
+    delete ptr;
   }
   printf("sum is %d\n", sum);
   return 0;
diff --git a/llvm/include/llvm/Analysis/IndirectCallPromotionAnalysis.h b/llvm/include/llvm/Analysis/IndirectCallPromotionAnalysis.h
index 8a05e913a9106..eda672d7d50ee 100644
--- a/llvm/include/llvm/Analysis/IndirectCallPromotionAnalysis.h
+++ b/llvm/include/llvm/Analysis/IndirectCallPromotionAnalysis.h
@@ -57,7 +57,7 @@ class ICallPromotionAnalysis {
   ///
   /// The returned array space is owned by this class, and overwritten on
   /// subsequent calls.
-  ArrayRef<InstrProfValueData>
+  MutableArrayRef<InstrProfValueData>
   getPromotionCandidatesForInstruction(const Instruction *I, uint32_t &NumVals,
                                        uint64_t &TotalCount,
                                        uint32_t &NumCandidates);
diff --git a/llvm/include/llvm/Analysis/IndirectCallVisitor.h b/llvm/include/llvm/Analysis/IndirectCallVisitor.h
index 66c972572b06c..f070e83c41689 100644
--- a/llvm/include/llvm/Analysis/IndirectCallVisitor.h
+++ b/llvm/include/llvm/Analysis/IndirectCallVisitor.h
@@ -37,6 +37,9 @@ struct PGOIndirectCallVisitor : public InstVisitor<PGOIndirectCallVisitor> {
   // A heuristic is used to find the address feeding instructions.
   static Instruction *tryGetVTableInstruction(CallBase *CB) {
     assert(CB != nullptr && "Caller guaranteed");
+    if (!CB->isIndirectCall())
+      return nullptr;
+
     LoadInst *LI = dyn_cast<LoadInst>(CB->getCalledOperand());
 
     if (LI != nullptr) {
diff --git a/llvm/include/llvm/Transforms/Utils/Local.h b/llvm/include/llvm/Transforms/Utils/Local.h
index 6937ec8dfd21c..5535a722a40fe 100644
--- a/llvm/include/llvm/Transforms/Utils/Local.h
+++ b/llvm/include/llvm/Transforms/Utils/Local.h
@@ -316,6 +316,15 @@ void salvageDebugInfoForDbgValues(Instruction &I,
                                   ArrayRef<DbgVariableIntrinsic *> Insns,
                                   ArrayRef<DbgVariableRecord *> DPInsns);
 
+void tryToSinkInstructionDbgValues(
+    Instruction *I, BasicBlock::iterator InsertPos, BasicBlock *SrcBlock,
+    BasicBlock *DestBlock, SmallVectorImpl<DbgVariableIntrinsic *> &DbgUsers);
+
+void tryToSinkInstructionDPValues(
+    Instruction *I, BasicBlock::iterator InsertPos, BasicBlock *SrcBlock,
+    BasicBlock *DestBlock,
+    SmallVectorImpl<DbgVariableRecord *> &DbgVariableRecords);
+
 /// Given an instruction \p I and DIExpression \p DIExpr operating on
 /// it, append the effects of \p I to the DIExpression operand list
 /// \p Ops, or return \p nullptr if it cannot be salvaged.
diff --git a/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp b/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp
index ab53717eb889a..643c155ba6d7e 100644
--- a/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp
+++ b/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp
@@ -87,7 +87,7 @@ uint32_t ICallPromotionAnalysis::getProfitablePromotionCandidates(
   return I;
 }
 
-ArrayRef<InstrProfValueData>
+MutableArrayRef<InstrProfValueData>
 ICallPromotionAnalysis::getPromotionCandidatesForInstruction(
     const Instruction *I, uint32_t &NumVals, uint64_t &TotalCount,
     uint32_t &NumCandidates) {
@@ -96,8 +96,8 @@ ICallPromotionAnalysis::getPromotionCandidatesForInstruction(
                                ValueDataArray.get(), NumVals, TotalCount);
   if (!Res) {
     NumCandidates = 0;
-    return ArrayRef<InstrProfValueData>();
+    return MutableArrayRef<InstrProfValueData>();
   }
   NumCandidates = getProfitablePromotionCandidates(I, NumVals, TotalCount);
-  return ArrayRef<InstrProfValueData>(ValueDataArray.get(), NumVals);
+  return MutableArrayRef<InstrProfValueData>(ValueDataArray.get(), NumVals);
 }
diff --git a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
index 23a7c6a20aecb..4de0aaef8d7ca 100644
--- a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
+++ b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
@@ -13,13 +13,17 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Analysis/IndirectCallPromotionAnalysis.h"
 #include "llvm/Analysis/IndirectCallVisitor.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/TypeMetadataUtils.h"
+#include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instructions.h"
@@ -37,6 +41,7 @@
 #include "llvm/Transforms/Instrumentation.h"
 #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
 #include "llvm/Transforms/Utils/CallPromotionUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include <cassert>
 #include <cstdint>
 #include <memory>
@@ -51,6 +56,8 @@ using namespace llvm;
 STATISTIC(NumOfPGOICallPromotion, "Number of indirect call promotions.");
 STATISTIC(NumOfPGOICallsites, "Number of indirect call candidate sites.");
 
+extern cl::opt<unsigned> MaxNumVTableAnnotations;
+
 // Command line option to disable indirect-call promotion with the default as
 // false. This is for debug purpose.
 static cl::opt<bool> DisableICP("disable-icp", cl::init(false), cl::Hidden,
@@ -103,13 +110,202 @@ static cl::opt<bool>
     ICPDUMPAFTER("icp-dumpafter", cl::init(false), cl::Hidden,
                  cl::desc("Dump IR after transformation happens"));
 
+// This option is meant to be used by LLVM regression test and test the
+// transformation that compares vtables.
+static cl::opt<bool> ICPEnableVTableCmp(
+    "icp-enable-vtable-cmp", cl::init(false), cl::Hidden,
+    cl::desc("If ThinLTO and WPD is enabled and this option is true, "
+             "indirect-call promotion pass will compare vtables rather than "
+             "functions for speculative devirtualization of virtual calls."
+             " If set to false, indirect-call promotion pass will always "
+             "compare functions."));
+
+static cl::opt<float>
+    ICPVTableCountPercentage("icp-vtable-count-percentage", cl::init(0.99),
+                             cl::Hidden,
+                             cl::desc("Percentage of vtable count to compare"));
+
+static cl::opt<int> ICPNumAdditionalVTableLast(
+    "icp-num-additional-vtable-last", cl::init(0), cl::Hidden,
+    cl::desc("The number of additional instruction for the last candidate"));
+
 namespace {
 
+using VTableAddressPointOffsetValMap =
+    SmallDenseMap<const GlobalVariable *, SmallDenseMap<int, Constant *, 4>, 8>;
+
+// A struct to collect type information for a virtual call site.
+struct VirtualCallSiteInfo {
+  // The offset from the address point to virtual function in the vtable.
+  uint64_t FunctionOffset;
+  // The instruction that computes the address point of vtable.
+  Instruction *VPtr;
+  // The compatible type used in LLVM type intrinsics.
+  StringRef CompatibleTypeStr;
+};
+
+// The key is a virtual call, and value is its type information.
+using VirtualCallSiteTypeInfoMap =
+    SmallDenseMap<const CallBase *, VirtualCallSiteInfo, 8>;
+
+// Find the offset where type string is `CompatibleType`.
+static std::optional<uint64_t>
+getCompatibleTypeOffset(const GlobalVariable &VTableVar,
+                        StringRef CompatibleType) {
+  SmallVector<MDNode *, 2> Types; // type metadata associated with a vtable.
+  VTableVar.getMetadata(LLVMContext::MD_type, Types);
+
+  for (MDNode *Type : Types)
+    if (auto *TypeId = dyn_cast<MDString>(Type->getOperand(1).get());
+        TypeId && TypeId->getString() == CompatibleType)
+
+      return cast<ConstantInt>(
+                 cast<ConstantAsMetadata>(Type->getOperand(0))->getValue())
+          ->getZExtValue();
+
+  return std::nullopt;
+}
+
+// Returns a constant representing the vtable's address point specified by the
+// offset.
+static Constant *getVTableAddressPointOffset(GlobalVariable *VTable,
+                                             uint32_t AddressPointOffset) {
+  Module &M = *VTable->getParent();
+  LLVMContext &Context = M.getContext();
+  assert(AddressPointOffset <
+             M.getDataLayout().getTypeAllocSize(VTable->getValueType()) &&
+         "Out-of-bound access");
+
+  return ConstantExpr::getInBoundsGetElementPtr(
+      Type::getInt8Ty(Context), VTable,
+      llvm::ConstantInt::get(Type::getInt32Ty(Context), AddressPointOffset));
+}
+
+// Returns the basic block in which `Inst` by `Use`.
+static BasicBlock *getUserBasicBlock(Instruction *Inst, unsigned int OperandNo,
+                                     Instruction *UserInst) {
+  if (PHINode *PN = dyn_cast<PHINode>(UserInst))
+    return PN->getIncomingBlock(
+        PHINode::getIncomingValueNumForOperand(OperandNo));
+
+  return UserInst->getParent();
+}
+
+// `DestBB` is a suitable basic block to sink `Inst` into when the following
+// conditions are true:
+// 1) `Inst->getParent()` is the sole predecessor of `DestBB`. This way `DestBB`
+//    is dominated by `Inst->getParent()` and we don't need to sink across a
+//    critical edge.
+// 2) `Inst` have users and all users are in `DestBB`.
+static bool isDestBBSuitableForSink(Instruction *Inst, BasicBlock *DestBB) {
+  BasicBlock *BB = Inst->getParent();
+  assert(Inst->getParent() != DestBB &&
+         BB->getTerminator()->getNumSuccessors() == 2 &&
+         "Caller should guarantee");
+  // Do not sink across a critical edge for simplicity.
+  if (DestBB->getUniquePredecessor() != BB)
+    return false;
+
+  // Now we know BB dominates DestBB.
+  BasicBlock *UserBB = nullptr;
+  for (Use &Use : Inst->uses()) {
+    User *User = Use.getUser();
+    // Do checked cast since IR verifier guarantees that the user of an
+    // instruction must be an instruction. See `Verifier::visitInstruction`.
+    Instruction *UserInst = cast<Instruction>(User);
+    // We can sink debug or pseudo instructions together with Inst.
+    if (UserInst->isDebugOrPseudoInst())
+      continue;
+    UserBB = getUserBasicBlock(Inst, Use.getOperandNo(), UserInst);
+    // Do not...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/81442


More information about the llvm-commits mailing list