[llvm] [TypeProf][IndirectCallProm]Tweak max number of vtables in cost-benefit analysis (PR #105620)

Mingming Liu via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 22 10:12:36 PDT 2024


https://github.com/minglotus-6 updated https://github.com/llvm/llvm-project/pull/105620

>From c9e880bc0f065d55e5d5f86e814fa29b252c8a2a Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Fri, 16 Aug 2024 15:00:11 -0700
Subject: [PATCH 1/4] resolve comments and add unit tests

---
 .../llvm/Analysis/TargetTransformInfo.h       | 10 +++
 .../llvm/Analysis/TargetTransformInfoImpl.h   |  2 +
 llvm/include/llvm/CodeGen/BasicTTIImpl.h      |  4 ++
 llvm/include/llvm/CodeGen/TargetLowering.h    |  6 +-
 llvm/lib/Analysis/TargetTransformInfo.cpp     |  4 ++
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  5 +-
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp |  4 +-
 .../CodeGen/SelectionDAG/TargetLowering.cpp   |  4 +-
 .../Target/AArch64/AArch64ISelLowering.cpp    |  3 +-
 llvm/lib/Target/AArch64/AArch64ISelLowering.h |  2 +-
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp     | 11 ++--
 llvm/lib/Target/AMDGPU/SIISelLowering.h       |  2 +-
 llvm/lib/Target/ARM/ARMISelLowering.cpp       |  3 +-
 llvm/lib/Target/ARM/ARMISelLowering.h         |  2 +-
 llvm/unittests/Target/AArch64/CMakeLists.txt  |  1 +
 .../Target/AArch64/TargetTransformInfo.cpp    | 60 +++++++++++++++++
 llvm/unittests/Target/X86/CMakeLists.txt      |  1 +
 .../Target/X86/TargetTransformInfo.cpp        | 64 +++++++++++++++++++
 18 files changed, 167 insertions(+), 21 deletions(-)
 create mode 100644 llvm/unittests/Target/AArch64/TargetTransformInfo.cpp
 create mode 100644 llvm/unittests/Target/X86/TargetTransformInfo.cpp

diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 38e8b9da213974..4956fc2c4303b0 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -665,6 +665,12 @@ class TargetTransformInfo {
   void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
                              PeelingPreferences &PP) const;
 
+  /// Return true if folding a constant offset with the given GlobalValue
+  /// (representing a GlobalAddress) is legal.  It is frequently not legal
+  /// in PIC relocation models.
+  /// Caller must guarantee that GlobalValue represents a global address.
+  bool isOffsetFoldingLegal(const GlobalValue *GV) const;
+
   /// Targets can implement their own combinations for target-specific
   /// intrinsics. This function will be called from the InstCombine pass every
   /// time a target-specific intrinsic is encountered.
@@ -1880,6 +1886,7 @@ class TargetTransformInfo::Concept {
       APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3,
       std::function<void(Instruction *, unsigned, APInt, APInt &)>
           SimplifyAndSetOp) = 0;
+  virtual bool isOffsetFoldingLegal(const GlobalValue *GV) const = 0;
   virtual bool isLegalAddImmediate(int64_t Imm) = 0;
   virtual bool isLegalAddScalableImmediate(int64_t Imm) = 0;
   virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
@@ -2348,6 +2355,9 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
         IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
         SimplifyAndSetOp);
   }
+  bool isOffsetFoldingLegal(const GlobalValue *GV) const override {
+    return Impl.isOffsetFoldingLegal(GV);
+  }
   bool isLegalAddImmediate(int64_t Imm) override {
     return Impl.isLegalAddImmediate(Imm);
   }
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index d208a710bb27fd..bb81f4defcd633 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -220,6 +220,8 @@ class TargetTransformInfoImplBase {
   void getPeelingPreferences(Loop *, ScalarEvolution &,
                              TTI::PeelingPreferences &) const {}
 
+  bool isOffsetFoldingLegal(const GlobalValue *GV) const { return false; }
+
   bool isLegalAddImmediate(int64_t Imm) const { return false; }
 
   bool isLegalAddScalableImmediate(int64_t Imm) const { return false; }
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 890c2b8ca36e11..8059191c6dd471 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -354,6 +354,10 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     return getTLI()->getPreferredLargeGEPBaseOffset(MinOffset, MaxOffset);
   }
 
+  bool isOffsetFoldingLegal(const GlobalValue *GV) const {
+    return getTLI()->isOffsetFoldingLegal(GV);
+  }
+
   unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
                              Type *ScalarValTy) const {
     auto &&IsSupportedByTarget = [this, ScalarMemTy, ScalarValTy](unsigned VF) {
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 9ccdbab008aec8..72fd58c0f87536 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -2811,6 +2811,10 @@ class TargetLoweringBase {
                                      Type *Ty, unsigned AddrSpace,
                                      Instruction *I = nullptr) const;
 
+  virtual bool isOffsetFoldingLegal(const GlobalValue *GV) const {
+    return false;
+  }
+
   /// Returns true if the targets addressing mode can target thread local
   /// storage (TLS).
   virtual bool addressingModeSupportsTLS(const GlobalValue &) const {
@@ -3862,7 +3866,7 @@ class TargetLowering : public TargetLoweringBase {
 
   /// Return true if folding a constant offset with the given GlobalAddress is
   /// legal.  It is frequently not legal in PIC relocation models.
-  virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+  bool isOffsetFoldingLegal(const GlobalValue *GV) const override;
 
   /// On x86, return true if the operand with index OpNo is a CALL or JUMP
   /// instruction, which can use either a memory constraint or an address
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index dcde78925bfa98..5e80c40c41a286 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -396,6 +396,10 @@ void TargetTransformInfo::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
   return TTIImpl->getPeelingPreferences(L, SE, PP);
 }
 
+bool TargetTransformInfo::isOffsetFoldingLegal(const GlobalValue *GV) const {
+  return TTIImpl->isOffsetFoldingLegal(GV);
+}
+
 bool TargetTransformInfo::isLegalAddImmediate(int64_t Imm) const {
   return TTIImpl->isLegalAddImmediate(Imm);
 }
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index f827eb559a01cf..9ad75fd328ceea 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1161,7 +1161,8 @@ bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
     }
   } else {
     if (auto *GA = dyn_cast<GlobalAddressSDNode>(N0.getOperand(1)))
-      if (GA->getOpcode() == ISD::GlobalAddress && TLI.isOffsetFoldingLegal(GA))
+      if (GA->getOpcode() == ISD::GlobalAddress &&
+          TLI.isOffsetFoldingLegal(GA->getGlobal()))
         return false;
 
     for (SDNode *Node : N->uses()) {
@@ -4006,7 +4007,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
 
   // If the relocation model supports it, consider symbol offsets.
   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
-    if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
+    if (!LegalOperations && TLI.isOffsetFoldingLegal(GA->getGlobal())) {
       // fold (sub Sym+c1, Sym+c2) -> c1-c2
       if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
         if (GA->getGlobal() == GB->getGlobal())
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index c3a7df5361cd45..9a8b6ae7da4053 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -6339,7 +6339,7 @@ SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode, EVT VT,
                                        const SDNode *N2) {
   if (GA->getOpcode() != ISD::GlobalAddress)
     return SDValue();
-  if (!TLI->isOffsetFoldingLegal(GA))
+  if (!TLI->isOffsetFoldingLegal(GA->getGlobal()))
     return SDValue();
   auto *C2 = dyn_cast<ConstantSDNode>(N2);
   if (!C2)
@@ -13112,7 +13112,7 @@ SDNode *SelectionDAG::isConstantIntBuildVectorOrConstantInt(SDValue N) const {
   // constant integer.
   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N))
     if (GA->getOpcode() == ISD::GlobalAddress &&
-        TLI->isOffsetFoldingLegal(GA))
+        TLI->isOffsetFoldingLegal(GA->getGlobal()))
       return GA;
   if ((N.getOpcode() == ISD::SPLAT_VECTOR) &&
       isa<ConstantSDNode>(N.getOperand(0)))
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index bef70dcb71f567..df63617c2d6883 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -485,10 +485,8 @@ SDValue TargetLowering::expandIndirectJTBranch(const SDLoc &dl, SDValue Value,
   return DAG.getNode(ISD::BRIND, dl, MVT::Other, Chain, Addr);
 }
 
-bool
-TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+bool TargetLowering::isOffsetFoldingLegal(const GlobalValue *GV) const {
   const TargetMachine &TM = getTargetMachine();
-  const GlobalValue *GV = GA->getGlobal();
 
   // If the address is not even local to this DSO we will have to load it from
   // a got and then add the offset.
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index f0c3afc4f9b5d5..7e7bd9a924d9b6 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -11282,8 +11282,7 @@ SDValue AArch64TargetLowering::LowerShiftParts(SDValue Op,
   return DAG.getMergeValues({Lo, Hi}, SDLoc(Op));
 }
 
-bool AArch64TargetLowering::isOffsetFoldingLegal(
-    const GlobalAddressSDNode *GA) const {
+bool AArch64TargetLowering::isOffsetFoldingLegal(const GlobalValue *GV) const {
   // Offsets are folded in the DAG combine rather than here so that we can
   // intelligently choose an offset based on the uses.
   return false;
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 81e15185f985d5..eb02484bcbe95b 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -622,7 +622,7 @@ class AArch64TargetLowering : public TargetLowering {
   FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
                            const TargetLibraryInfo *libInfo) const override;
 
-  bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
+  bool isOffsetFoldingLegal(const GlobalValue *GV) const override;
 
   bool isFPImmLegal(const APFloat &Imm, EVT VT,
                     bool ForCodeSize) const override;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index f8767e00949bf0..da4c5c998b5dbd 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -7551,8 +7551,7 @@ SDValue SITargetLowering::lowerBUILD_VECTOR(SDValue Op,
   return DAG.getNode(ISD::BITCAST, SL, VT, Or);
 }
 
-bool
-SITargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+bool SITargetLowering::isOffsetFoldingLegal(const GlobalValue *GV) const {
   // OSes that use ELF REL relocations (instead of RELA) can only store a
   // 32-bit addend in the instruction, so it is not safe to allow offset folding
   // which can create arbitrary 64-bit addends. (This is only a problem for
@@ -7565,10 +7564,10 @@ SITargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
     return false;
 
   // We can fold offsets for anything that doesn't require a GOT relocation.
-  return (GA->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
-          GA->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
-          GA->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) &&
-         !shouldEmitGOTReloc(GA->getGlobal());
+  return (GV->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
+          GV->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
+          GV->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) &&
+         !shouldEmitGOTReloc(GV);
 }
 
 static SDValue
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 1f198a92c0fa6a..e9137614ecdf20 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -364,7 +364,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
 
   bool isTypeDesirableForOp(unsigned Op, EVT VT) const override;
 
-  bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
+  bool isOffsetFoldingLegal(const GlobalValue *GV) const override;
 
   unsigned combineRepeatedFPDivisors() const override {
     // Combine multiple FDIVs with the same divisor into multiple FMULs by the
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 75d16a42d0205a..ed8a9ffd65dc84 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -20974,8 +20974,7 @@ SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
   return IsStrict ? DAG.getMergeValues({Result, Chain}, Loc) : Result;
 }
 
-bool
-ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+bool ARMTargetLowering::isOffsetFoldingLegal(const GlobalValue *GV) const {
   // The ARM target isn't yet aware of offsets.
   return false;
 }
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index a255e9b6fc365f..758b09786f05d2 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -595,7 +595,7 @@ class VectorType;
 
     bool
     isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override;
-    bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
+    bool isOffsetFoldingLegal(const GlobalValue *GV) const override;
 
     /// isFPImmLegal - Returns true if the target can instruction select the
     /// specified FP immediate natively. If false, the legalizer will
diff --git a/llvm/unittests/Target/AArch64/CMakeLists.txt b/llvm/unittests/Target/AArch64/CMakeLists.txt
index f53668373efee9..726fa46ba97eb0 100644
--- a/llvm/unittests/Target/AArch64/CMakeLists.txt
+++ b/llvm/unittests/Target/AArch64/CMakeLists.txt
@@ -30,4 +30,5 @@ add_llvm_target_unittest(AArch64Tests
   SMEAttributesTest.cpp
   AArch64SVESchedPseudoTest.cpp
   Immediates.cpp
+  TargetTransformInfo.cpp
   )
diff --git a/llvm/unittests/Target/AArch64/TargetTransformInfo.cpp b/llvm/unittests/Target/AArch64/TargetTransformInfo.cpp
new file mode 100644
index 00000000000000..b87675b9230650
--- /dev/null
+++ b/llvm/unittests/Target/AArch64/TargetTransformInfo.cpp
@@ -0,0 +1,60 @@
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "AArch64Subtarget.h"
+#include "AArch64TargetMachine.h"
+#include "llvm/AsmParser/Parser.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/TargetSelect.h"
+
+#include "gtest/gtest.h"
+#include <initializer_list>
+#include <memory>
+
+using namespace llvm;
+
+namespace {
+
+static std::unique_ptr<Module> parseIR(LLVMContext &C, const char *IR) {
+  SMDiagnostic Err;
+  std::unique_ptr<Module> Mod = parseAssemblyString(IR, Err, C);
+  if (!Mod)
+    Err.print(__FILE__, errs());
+  return Mod;
+}
+
+TEST(TargetTransformInfo, isOffsetFoldingLegal) {
+  LLVMInitializeAArch64TargetInfo();
+  LLVMInitializeAArch64Target();
+  LLVMInitializeAArch64TargetMC();
+
+  LLVMContext Ctx;
+  std::unique_ptr<Module> M = parseIR(Ctx, R"(
+    target triple = "aarch64-unknown-linux-gnu"
+
+    @Base1 = dso_local constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @Base1_foo, ptr @Base1_bar] }
+    @Base2 = constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @Base1_foo, ptr @Base1_bar] }
+    
+    define void @Base1_bar(ptr %this) {
+      ret void
+    }
+
+    declare i32 @Base1_foo(ptr)
+  )");
+
+  std::string Error;
+  const Target *T = TargetRegistry::lookupTarget(M->getTargetTriple(), Error);
+  std::unique_ptr<TargetMachine> TM(T->createTargetMachine(
+      M->getTargetTriple(), "generic", "", TargetOptions(), std::nullopt,
+      std::nullopt, CodeGenOptLevel::Default));
+
+  ASSERT_FALSE(TM->isPositionIndependent());
+
+  TargetTransformInfo TTI =
+      TM->getTargetTransformInfo(*M->getFunction("Base1_bar"));
+
+  EXPECT_FALSE(TTI.isOffsetFoldingLegal(M->getNamedValue("Base1")));
+  EXPECT_FALSE(TTI.isOffsetFoldingLegal(M->getNamedValue("Base2")));
+}
+} // namespace
diff --git a/llvm/unittests/Target/X86/CMakeLists.txt b/llvm/unittests/Target/X86/CMakeLists.txt
index b011681aa3b95a..236ab8aebf4f6a 100644
--- a/llvm/unittests/Target/X86/CMakeLists.txt
+++ b/llvm/unittests/Target/X86/CMakeLists.txt
@@ -23,5 +23,6 @@ set(LLVM_LINK_COMPONENTS
 
 add_llvm_unittest(X86Tests
   MachineSizeOptsTest.cpp
+  TargetTransformInfo.cpp
   TernlogTest.cpp
   )
diff --git a/llvm/unittests/Target/X86/TargetTransformInfo.cpp b/llvm/unittests/Target/X86/TargetTransformInfo.cpp
new file mode 100644
index 00000000000000..f38062c8691277
--- /dev/null
+++ b/llvm/unittests/Target/X86/TargetTransformInfo.cpp
@@ -0,0 +1,64 @@
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include "llvm/AsmParser/Parser.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Target/TargetMachine.h"
+
+#include "gtest/gtest.h"
+#include <initializer_list>
+#include <memory>
+
+using namespace llvm;
+
+namespace {
+
+static std::unique_ptr<Module> parseIR(LLVMContext &C, const char *IR) {
+  SMDiagnostic Err;
+  std::unique_ptr<Module> Mod = parseAssemblyString(IR, Err, C);
+  if (!Mod)
+    Err.print(__FILE__, errs());
+  return Mod;
+}
+
+TEST(TargetTransformInfo, isOffsetFoldingLegal) {
+  LLVMInitializeX86TargetInfo();
+  LLVMInitializeX86Target();
+  LLVMInitializeX86TargetMC();
+
+  LLVMContext Ctx;
+  std::unique_ptr<Module> M = parseIR(Ctx, R"(
+    
+    target triple = "x86_64-unknown-linux-gnu"
+    @Base1 = dso_local constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @Base1_foo, ptr @Base1_bar] }
+    @Base2 = constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @Base1_foo, ptr @Base1_bar] }
+    
+    define void @Base1_bar(ptr %this) {
+      ret void
+    }
+
+    declare i32 @Base1_foo(ptr)
+  )");
+
+  std::string Error;
+  const Target *T = TargetRegistry::lookupTarget(M->getTargetTriple(), Error);
+  std::unique_ptr<TargetMachine> TM(T->createTargetMachine(
+      M->getTargetTriple(), "generic", "", TargetOptions(), std::nullopt,
+      std::nullopt, CodeGenOptLevel::Default));
+  ASSERT_FALSE(TM->isPositionIndependent());
+
+  Function *Func = M->getFunction("Base1_bar");
+
+  TargetTransformInfo TTI = TM->getTargetTransformInfo(*Func);
+
+  // Base1 is dso_local.
+  EXPECT_TRUE(TTI.isOffsetFoldingLegal(M->getNamedValue("Base1")));
+
+  // Base2 is not dso_local.
+  EXPECT_FALSE(TTI.isOffsetFoldingLegal(M->getNamedValue("Base2")));
+}
+} // namespace

>From 8560fe3ac859e64545272f5a16fe4dcb023b05f9 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Wed, 21 Aug 2024 21:53:53 -0700
Subject: [PATCH 2/4] update comment

---
 llvm/include/llvm/Analysis/TargetTransformInfo.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 4956fc2c4303b0..4183c4f90f40fd 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -665,10 +665,11 @@ class TargetTransformInfo {
   void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
                              PeelingPreferences &PP) const;
 
-  /// Return true if folding a constant offset with the given GlobalValue
-  /// (representing a GlobalAddress) is legal.  It is frequently not legal
-  /// in PIC relocation models.
-  /// Caller must guarantee that GlobalValue represents a global address.
+  /// Given a global address represented by a global value and a constant
+  /// offset relative to it, return true if the constant offset is foldable into
+  /// the global value when lowering the global address. The constant is usually
+  /// not foldable PIC relocation models. Caller must guarantee that GlobalValue
+  /// represents a global address.
   bool isOffsetFoldingLegal(const GlobalValue *GV) const;
 
   /// Targets can implement their own combinations for target-specific

>From d86204bc27e0f0f405428a60f0f88d82812dbdb4 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Wed, 21 Aug 2024 23:20:24 -0700
Subject: [PATCH 3/4] [TypeProf][IndirectCallProm]Tweak max number of vtables
 by considering vtable address point materialization cost

---
 .../Instrumentation/IndirectCallPromotion.cpp |  35 ++++-
 .../Other/new-pm-thinlto-postlink-defaults.ll |   2 +-
 .../new-pm-thinlto-postlink-pgo-defaults.ll   |   2 +-
 ...-pm-thinlto-postlink-samplepgo-defaults.ll |   2 +-
 .../new-pm-thinlto-prelink-pgo-defaults.ll    |   2 +-
 .../icp_vtable_address_point_cost.ll          | 139 ++++++++++++++++++
 6 files changed, 172 insertions(+), 10 deletions(-)
 create mode 100644 llvm/test/Transforms/PGOProfile/icp_vtable_address_point_cost.ll

diff --git a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
index 0d1f506986379d..72497b5f71a413 100644
--- a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
+++ b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
@@ -20,6 +20,7 @@
 #include "llvm/Analysis/IndirectCallVisitor.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/TypeMetadataUtils.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/Dominators.h"
@@ -303,6 +304,8 @@ class IndirectCallPromoter {
   Function &F;
   Module &M;
 
+  const TargetTransformInfo &TTI;
+
   ProfileSummaryInfo *PSI = nullptr;
 
   // Symtab that maps indirect call profile values to function names and
@@ -369,6 +372,9 @@ class IndirectCallPromoter {
                                     ArrayRef<PromotionCandidate> Candidates,
                                     uint64_t TotalCount);
 
+  bool addressPointLoweringCostComparable(
+      const VTableGUIDCountsMap &VTableGUIDCounts) const;
+
   // Given an indirect callsite and the list of function candidates, compute
   // the following vtable information in output parameters and return vtable
   // pointer if type profiles exist.
@@ -391,12 +397,12 @@ class IndirectCallPromoter {
 
 public:
   IndirectCallPromoter(
-      Function &Func, Module &M, ProfileSummaryInfo *PSI,
-      InstrProfSymtab *Symtab, bool SamplePGO,
+      Function &Func, Module &M, const TargetTransformInfo &TTI,
+      ProfileSummaryInfo *PSI, InstrProfSymtab *Symtab, bool SamplePGO,
       const VirtualCallSiteTypeInfoMap &VirtualCSInfo,
       VTableAddressPointOffsetValMap &VTableAddressPointOffsetVal,
       OptimizationRemarkEmitter &ORE)
-      : F(Func), M(M), PSI(PSI), Symtab(Symtab), SamplePGO(SamplePGO),
+      : F(Func), M(M), TTI(TTI), PSI(PSI), Symtab(Symtab), SamplePGO(SamplePGO),
         VirtualCSInfo(VirtualCSInfo),
         VTableAddressPointOffsetVal(VTableAddressPointOffsetVal), ORE(ORE) {}
   IndirectCallPromoter(const IndirectCallPromoter &) = delete;
@@ -833,6 +839,18 @@ bool IndirectCallPromoter::processFunction(ProfileSummaryInfo *PSI) {
   return Changed;
 }
 
+bool IndirectCallPromoter::addressPointLoweringCostComparable(
+    const VTableGUIDCountsMap &VTableGUIDAndCounts) const {
+  for (auto &[GUID, Count] : VTableGUIDAndCounts) {
+    GlobalVariable *VTable = Symtab->getGlobalVariable(GUID);
+    assert(VTable != nullptr &&
+           "guaranteed by IndirectCallPromoter::computeVTableInfos");
+    if (!TTI.isOffsetFoldingLegal(VTable))
+      return false;
+  }
+  return true;
+}
+
 // TODO: Return false if the function addressing and vtable load instructions
 // cannot sink to indirect fallback.
 bool IndirectCallPromoter::isProfitableToCompareVTables(
@@ -877,8 +895,12 @@ bool IndirectCallPromoter::isProfitableToCompareVTables(
     // chain for the subsequent candidates. Set its value to 1 for non-last
     // candidate and allow option to override it for the last candidate.
     int MaxNumVTable = 1;
-    if (I == CandidateSize - 1)
-      MaxNumVTable = ICPMaxNumVTableLastCandidate;
+    if (I == CandidateSize - 1) {
+      if (addressPointLoweringCostComparable(VTableGUIDAndCounts))
+        MaxNumVTable = 2;
+      if (ICPMaxNumVTableLastCandidate.getNumOccurrences())
+        MaxNumVTable = ICPMaxNumVTableLastCandidate;
+    }
 
     if ((int)Candidate.AddressPoints.size() > MaxNumVTable) {
       LLVM_DEBUG(dbgs() << "    allow at most " << MaxNumVTable << " and got "
@@ -991,8 +1013,9 @@ static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO,
     auto &FAM =
         MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
     auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+    auto &TTI = FAM.getResult<TargetIRAnalysis>(F);
 
-    IndirectCallPromoter CallPromoter(F, M, PSI, &Symtab, SamplePGO,
+    IndirectCallPromoter CallPromoter(F, M, TTI, PSI, &Symtab, SamplePGO,
                                       VirtualCSInfo,
                                       VTableAddressPointOffsetVal, ORE);
     bool FuncChanged = CallPromoter.processFunction(PSI);
diff --git a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll
index 064362eabbf839..60921edd7cee23 100644
--- a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll
@@ -43,12 +43,12 @@
 ; CHECK-POSTLINK-O-NEXT: Running analysis: ProfileSummaryAnalysis
 ; CHECK-POSTLINK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
 ; CHECK-POSTLINK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis
+; CHECK-POSTLINK-O-NEXT: Running analysis: TargetIRAnalysis on foo
 ; CHECK-O-NEXT: Running pass: OpenMPOptPass
 ; CHECK-POSTLINK-O-NEXT: Running pass: LowerTypeTestsPass
 ; CHECK-O-NEXT: Running pass: IPSCCPPass
 ; CHECK-O-NEXT: Running analysis: DominatorTreeAnalysis
 ; CHECK-O-NEXT: Running analysis: AssumptionAnalysis
-; CHECK-O-NEXT: Running analysis: TargetIRAnalysis
 ; CHECK-O-NEXT: Running pass: CalledValuePropagationPass
 ; CHECK-O-NEXT: Running pass: GlobalOptPass
 ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis
diff --git a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
index 19a44867e434ac..30ec5dc3b9e23d 100644
--- a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
@@ -28,12 +28,12 @@
 ; CHECK-O-NEXT: Running analysis: ProfileSummaryAnalysis
 ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
 ; CHECK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis
+; CHECK-O-NEXT: Running analysis: TargetIRAnalysis on foo
 ; CHECK-O-NEXT: Running pass: OpenMPOptPass
 ; CHECK-O-NEXT: Running pass: LowerTypeTestsPass
 ; CHECK-O-NEXT: Running pass: IPSCCPPass
 ; CHECK-O-NEXT: Running analysis: DominatorTreeAnalysis
 ; CHECK-O-NEXT: Running analysis: AssumptionAnalysis
-; CHECK-O-NEXT: Running analysis: TargetIRAnalysis
 ; CHECK-O-NEXT: Running pass: CalledValuePropagationPass
 ; CHECK-O-NEXT: Running pass: GlobalOptPass
 ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis
diff --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
index e5aebc4850e6db..21364b6a66f872 100644
--- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
@@ -37,12 +37,12 @@
 ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
 ; CHECK-O-NEXT: Running pass: PGOIndirectCallPromotion
 ; CHECK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis
+; CHECK-O-NEXT: Running analysis: TargetIRAnalysis
 ; CHECK-O-NEXT: Running pass: OpenMPOptPass
 ; CHECK-O-NEXT: Running pass: LowerTypeTestsPass
 ; CHECK-O-NEXT: Running pass: IPSCCPPass
 ; CHECK-O-NEXT: Running analysis: DominatorTreeAnalysis
 ; CHECK-O-NEXT: Running analysis: AssumptionAnalysis
-; CHECK-O-NEXT: Running analysis: TargetIRAnalysis
 ; CHECK-O-NEXT: Running pass: CalledValuePropagationPass
 ; CHECK-O-NEXT: Running pass: GlobalOptPass
 ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis
diff --git a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
index cb49cbd22d60c0..153856112f20e7 100644
--- a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
@@ -84,6 +84,7 @@
 ; CHECK-O-NEXT: Running pass: PGOIndirectCallPromotion on
 ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
 ; CHECK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis on foo
+; CHECK-O-NEXT: Running analysis: TargetIRAnalysis
 ; CHECK-O-NEXT: Running pass: AlwaysInlinerPass
 ; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass
 ; CHECK-O-NEXT: Running analysis: InlineAdvisorAnalysis
@@ -106,7 +107,6 @@
 ; CHECK-O-NEXT: Running pass: SROAPass
 ; CHECK-O-NEXT: Running analysis: DominatorTreeAnalysis
 ; CHECK-O-NEXT: Running analysis: AssumptionAnalysis
-; CHECK-O-NEXT: Running analysis: TargetIRAnalysis
 ; CHECK-O-NEXT: Running pass: EarlyCSEPass
 ; CHECK-O-NEXT: Running analysis: MemorySSAAnalysis
 ; CHECK-O-NEXT: Running analysis: AAManager
diff --git a/llvm/test/Transforms/PGOProfile/icp_vtable_address_point_cost.ll b/llvm/test/Transforms/PGOProfile/icp_vtable_address_point_cost.ll
new file mode 100644
index 00000000000000..95a95d4ac7c669
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/icp_vtable_address_point_cost.ll
@@ -0,0 +1,139 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+
+; RUN: opt < %s -passes='pgo-icall-prom' -pass-remarks=pgo-icall-prom -enable-vtable-profile-use -S 2>&1 | FileCheck %s --check-prefixes=VTABLE-COMMON,VTABLE-CMP
+; RUN: opt < %s -passes='pgo-icall-prom' -pass-remarks=pgo-icall-prom -enable-vtable-profile-use -icp-max-num-vtable-last-candidate=1 -S 2>&1 | FileCheck %s --check-prefixes=VTABLE-COMMON,FUNC-CMP
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at Base1 = dso_local constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @Base1_foo, ptr @Base1_bar] }, !type !0
+ at Base2 = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @Base2_foo] }, !type !2
+ at Base3 = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @Base3_foo] }, !type !6
+
+ at Derived1 = constant { [3 x ptr], [4 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @Base2_foo], [4 x ptr] [ptr null, ptr null, ptr @Base1_foo, ptr @Derived1_bar] }, !type !1, !type !2, !type !3
+ at Derived2 = constant { [3 x ptr], [3 x ptr], [4 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @Base3_foo], [3 x ptr] [ptr null, ptr null, ptr @Base2_foo], [4 x ptr] [ptr null, ptr null, ptr @Base1_foo, ptr @Derived2_bar] }, !type !4, !type !5, !type !6, !type !7
+ at Derived3 = dso_local constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @Base1_foo, ptr @Base1_bar] }, !type !0, !type !8
+
+; VTABLE-CMP: remark: <unknown>:0:0: Promote indirect call to Derived1_bar with count 600 out of 1600, sink 2 instruction(s) and compare 1 vtable(s): {Derived1}
+; VTABLE-CMP: remark: <unknown>:0:0: Promote indirect call to Derived2_bar with count 500 out of 1000, sink 2 instruction(s) and compare 1 vtable(s): {Derived2}
+; VTABLE-CMP: remark: <unknown>:0:0: Promote indirect call to Base1_bar with count 400 out of 500, sink 2 instruction(s) and compare 2 vtable(s): {Derived3, Base1}
+
+define void @test(ptr %d) {
+; VTABLE-CMP-LABEL: define void @test(
+; VTABLE-CMP-SAME: ptr [[D:%.*]]) {
+; VTABLE-CMP-NEXT:  [[ENTRY:.*:]]
+; VTABLE-CMP-NEXT:    [[VTABLE:%.*]] = load ptr, ptr [[D]], align 8, !prof [[PROF9:![0-9]+]]
+; VTABLE-CMP-NEXT:    [[TMP0:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"Base1")
+; VTABLE-CMP-NEXT:    tail call void @llvm.assume(i1 [[TMP0]])
+; VTABLE-CMP-NEXT:    [[TMP1:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @Derived1, i32 40)
+; VTABLE-CMP-NEXT:    br i1 [[TMP1]], label %[[IF_TRUE_DIRECT_TARG:.*]], label %[[IF_FALSE_ORIG_INDIRECT:.*]], !prof [[PROF10:![0-9]+]]
+; VTABLE-CMP:       [[IF_TRUE_DIRECT_TARG]]:
+; VTABLE-CMP-NEXT:    call void @Derived1_bar(ptr [[D]])
+; VTABLE-CMP-NEXT:    br label %[[IF_END_ICP:.*]]
+; VTABLE-CMP:       [[IF_FALSE_ORIG_INDIRECT]]:
+; VTABLE-CMP-NEXT:    [[TMP2:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @Derived2, i32 64)
+; VTABLE-CMP-NEXT:    br i1 [[TMP2]], label %[[IF_TRUE_DIRECT_TARG1:.*]], label %[[IF_FALSE_ORIG_INDIRECT2:.*]], !prof [[PROF11:![0-9]+]]
+; VTABLE-CMP:       [[IF_TRUE_DIRECT_TARG1]]:
+; VTABLE-CMP-NEXT:    call void @Derived2_bar(ptr [[D]])
+; VTABLE-CMP-NEXT:    br label %[[IF_END_ICP3:.*]]
+; VTABLE-CMP:       [[IF_FALSE_ORIG_INDIRECT2]]:
+; VTABLE-CMP-NEXT:    [[TMP3:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @Base1, i32 16)
+; VTABLE-CMP-NEXT:    [[TMP4:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @Derived3, i32 16)
+; VTABLE-CMP-NEXT:    [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
+; VTABLE-CMP-NEXT:    br i1 [[TMP5]], label %[[IF_TRUE_DIRECT_TARG4:.*]], label %[[IF_FALSE_ORIG_INDIRECT5:.*]], !prof [[PROF12:![0-9]+]]
+; VTABLE-CMP:       [[IF_TRUE_DIRECT_TARG4]]:
+; VTABLE-CMP-NEXT:    call void @Base1_bar(ptr [[D]])
+; VTABLE-CMP-NEXT:    br label %[[IF_END_ICP6:.*]]
+; VTABLE-CMP:       [[IF_FALSE_ORIG_INDIRECT5]]:
+; VTABLE-CMP-NEXT:    [[VFN:%.*]] = getelementptr inbounds ptr, ptr [[VTABLE]], i64 1
+; VTABLE-CMP-NEXT:    [[TMP6:%.*]] = load ptr, ptr [[VFN]], align 8
+; VTABLE-CMP-NEXT:    call void [[TMP6]](ptr [[D]])
+; VTABLE-CMP-NEXT:    br label %[[IF_END_ICP6]]
+; VTABLE-CMP:       [[IF_END_ICP6]]:
+; VTABLE-CMP-NEXT:    br label %[[IF_END_ICP3]]
+; VTABLE-CMP:       [[IF_END_ICP3]]:
+; VTABLE-CMP-NEXT:    br label %[[IF_END_ICP]]
+; VTABLE-CMP:       [[IF_END_ICP]]:
+; VTABLE-CMP-NEXT:    ret void
+;
+; FUNC-CMP-LABEL: define void @test(
+; FUNC-CMP-SAME: ptr [[D:%.*]]) {
+; FUNC-CMP-NEXT:  [[ENTRY:.*:]]
+; FUNC-CMP-NEXT:    [[VTABLE:%.*]] = load ptr, ptr [[D]], align 8, !prof [[PROF9:![0-9]+]]
+; FUNC-CMP-NEXT:    [[TMP0:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"Base1")
+; FUNC-CMP-NEXT:    tail call void @llvm.assume(i1 [[TMP0]])
+; FUNC-CMP-NEXT:    [[VFN:%.*]] = getelementptr inbounds ptr, ptr [[VTABLE]], i64 1
+; FUNC-CMP-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[VFN]], align 8
+; FUNC-CMP-NEXT:    [[TMP2:%.*]] = icmp eq ptr [[TMP1]], @Derived1_bar
+; FUNC-CMP-NEXT:    br i1 [[TMP2]], label %[[IF_TRUE_DIRECT_TARG:.*]], label %[[IF_FALSE_ORIG_INDIRECT:.*]], !prof [[PROF10:![0-9]+]]
+; FUNC-CMP:       [[IF_TRUE_DIRECT_TARG]]:
+; FUNC-CMP-NEXT:    call void @Derived1_bar(ptr [[D]])
+; FUNC-CMP-NEXT:    br label %[[IF_END_ICP:.*]]
+; FUNC-CMP:       [[IF_FALSE_ORIG_INDIRECT]]:
+; FUNC-CMP-NEXT:    [[TMP3:%.*]] = icmp eq ptr [[TMP1]], @Derived2_bar
+; FUNC-CMP-NEXT:    br i1 [[TMP3]], label %[[IF_TRUE_DIRECT_TARG1:.*]], label %[[IF_FALSE_ORIG_INDIRECT2:.*]], !prof [[PROF11:![0-9]+]]
+; FUNC-CMP:       [[IF_TRUE_DIRECT_TARG1]]:
+; FUNC-CMP-NEXT:    call void @Derived2_bar(ptr [[D]])
+; FUNC-CMP-NEXT:    br label %[[IF_END_ICP3:.*]]
+; FUNC-CMP:       [[IF_FALSE_ORIG_INDIRECT2]]:
+; FUNC-CMP-NEXT:    [[TMP4:%.*]] = icmp eq ptr [[TMP1]], @Base1_bar
+; FUNC-CMP-NEXT:    br i1 [[TMP4]], label %[[IF_TRUE_DIRECT_TARG4:.*]], label %[[IF_FALSE_ORIG_INDIRECT5:.*]], !prof [[PROF12:![0-9]+]]
+; FUNC-CMP:       [[IF_TRUE_DIRECT_TARG4]]:
+; FUNC-CMP-NEXT:    call void @Base1_bar(ptr [[D]])
+; FUNC-CMP-NEXT:    br label %[[IF_END_ICP6:.*]]
+; FUNC-CMP:       [[IF_FALSE_ORIG_INDIRECT5]]:
+; FUNC-CMP-NEXT:    call void [[TMP1]](ptr [[D]])
+; FUNC-CMP-NEXT:    br label %[[IF_END_ICP6]]
+; FUNC-CMP:       [[IF_END_ICP6]]:
+; FUNC-CMP-NEXT:    br label %[[IF_END_ICP3]]
+; FUNC-CMP:       [[IF_END_ICP3]]:
+; FUNC-CMP-NEXT:    br label %[[IF_END_ICP]]
+; FUNC-CMP:       [[IF_END_ICP]]:
+; FUNC-CMP-NEXT:    ret void
+;
+entry:
+  %vtable = load ptr, ptr %d, !prof !9
+  %0 = tail call i1 @llvm.type.test(ptr %vtable, metadata !"Base1")
+  tail call void @llvm.assume(i1 %0)
+  %vfn = getelementptr inbounds ptr, ptr %vtable, i64 1
+  %1 = load ptr, ptr %vfn
+  call void %1(ptr %d), !prof !10
+  ret void
+}
+
+define void @Base1_bar(ptr %this) {
+  ret void
+}
+
+define void @Derived1_bar(ptr %this) {
+  ret void
+}
+
+define void @Derived2_bar(ptr %this) {
+  ret void
+}
+
+
+declare i1 @llvm.type.test(ptr, metadata)
+declare void @llvm.assume(i1)
+declare i32 @Base2_foo(ptr)
+declare i32 @Base1_foo(ptr)
+declare void @Base3_foo(ptr)
+
+!0 = !{i64 16, !"Base1"}
+!1 = !{i64 40, !"Base1"}
+!2 = !{i64 16, !"Base2"}
+!3 = !{i64 16, !"Derived1"}
+!4 = !{i64 64, !"Base1"}
+!5 = !{i64 40, !"Base2"}
+!6 = !{i64 16, !"Base3"}
+!7 = !{i64 16, !"Derived2"}
+!8 = !{i64 16, !"Derived3"}
+!9 = !{!"VP", i32 2, i64 1600, i64 -4123858694673519054, i64 600, i64 -7211198353767973908, i64 500, i64 -3574436251470806727, i64 200, i64 6288809125658696740, i64 200, i64 12345678, i64 100}
+!10 = !{!"VP", i32 0, i64 1600, i64 3827408714133779784, i64 600, i64 5837445539218476403, i64 500, i64 -9064955852395570538, i64 400,  i64 56781234, i64 100}
+;.
+; VTABLE-COMMON: [[PROF9]] = !{!"VP", i32 2, i64 100, i64 12345678, i64 100}
+; VTABLE-COMMON: [[PROF10]] = !{!"branch_weights", i32 600, i32 1000}
+; VTABLE-COMMON: [[PROF11]] = !{!"branch_weights", i32 500, i32 500}
+; VTABLE-COMMON: [[PROF12]] = !{!"branch_weights", i32 400, i32 100}
+

>From 946735a3cb7a71f364217c56aa089746f9db6c0d Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Thu, 22 Aug 2024 10:12:05 -0700
Subject: [PATCH 4/4] update comment and function name

---
 .../Instrumentation/IndirectCallPromotion.cpp | 22 ++++++++++++-------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
index 72497b5f71a413..13f47215a62fe5 100644
--- a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
+++ b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
@@ -372,7 +372,9 @@ class IndirectCallPromoter {
                                     ArrayRef<PromotionCandidate> Candidates,
                                     uint64_t TotalCount);
 
-  bool addressPointLoweringCostComparable(
+  // Returns true if vtable address points are foldable into the vtable for
+  // each vtable represented by GUIDs (i.e., VTableGUIDCounts keys).
+  bool isAddressPointOffsetFoldable(
       const VTableGUIDCountsMap &VTableGUIDCounts) const;
 
   // Given an indirect callsite and the list of function candidates, compute
@@ -839,7 +841,7 @@ bool IndirectCallPromoter::processFunction(ProfileSummaryInfo *PSI) {
   return Changed;
 }
 
-bool IndirectCallPromoter::addressPointLoweringCostComparable(
+bool IndirectCallPromoter::isAddressPointOffsetFoldable(
     const VTableGUIDCountsMap &VTableGUIDAndCounts) const {
   for (auto &[GUID, Count] : VTableGUIDAndCounts) {
     GlobalVariable *VTable = Symtab->getGlobalVariable(GUID);
@@ -889,15 +891,19 @@ bool IndirectCallPromoter::isProfitableToCompareVTables(
     RemainingVTableCount -= Candidate.Count;
 
     // 'MaxNumVTable' limits the number of vtables to make vtable comparison
-    // profitable. Comparing multiple vtables for one function candidate will
-    // insert additional instructions on the hot path, and allowing more than
-    // one vtable for non last candidates may or may not elongate the dependency
-    // chain for the subsequent candidates. Set its value to 1 for non-last
-    // candidate and allow option to override it for the last candidate.
+    // profitable. Set it to 1 control icache pressure and conditionally allow
+    // an additional vtable for the last function candidate.
     int MaxNumVTable = 1;
     if (I == CandidateSize - 1) {
-      if (addressPointLoweringCostComparable(VTableGUIDAndCounts))
+      // Comparing an additional vtable inserts `icmp vptr, @vtable +
+      // address-point-offset` IR instruction.
+      // `@vtable + address-point-offset` will lower to a standalone instruction
+      // if the constant offset is not foldable, and fold into the cmp
+      // instruction otherwise. Allow an additional vtable by default if address
+      // point offset is foldlable.
+      if (isAddressPointOffsetFoldable(VTableGUIDAndCounts))
         MaxNumVTable = 2;
+      // Allow command line override.
       if (ICPMaxNumVTableLastCandidate.getNumOccurrences())
         MaxNumVTable = ICPMaxNumVTableLastCandidate;
     }



More information about the llvm-commits mailing list