[llvm] [TypeProf][IndirectCallProm]Tweak max number of vtables in cost-benefit analysis (PR #105620)
Mingming Liu via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 22 10:12:36 PDT 2024
https://github.com/minglotus-6 updated https://github.com/llvm/llvm-project/pull/105620
>From c9e880bc0f065d55e5d5f86e814fa29b252c8a2a Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Fri, 16 Aug 2024 15:00:11 -0700
Subject: [PATCH 1/4] resolve comments and add unit tests
---
.../llvm/Analysis/TargetTransformInfo.h | 10 +++
.../llvm/Analysis/TargetTransformInfoImpl.h | 2 +
llvm/include/llvm/CodeGen/BasicTTIImpl.h | 4 ++
llvm/include/llvm/CodeGen/TargetLowering.h | 6 +-
llvm/lib/Analysis/TargetTransformInfo.cpp | 4 ++
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 5 +-
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 4 +-
.../CodeGen/SelectionDAG/TargetLowering.cpp | 4 +-
.../Target/AArch64/AArch64ISelLowering.cpp | 3 +-
llvm/lib/Target/AArch64/AArch64ISelLowering.h | 2 +-
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 11 ++--
llvm/lib/Target/AMDGPU/SIISelLowering.h | 2 +-
llvm/lib/Target/ARM/ARMISelLowering.cpp | 3 +-
llvm/lib/Target/ARM/ARMISelLowering.h | 2 +-
llvm/unittests/Target/AArch64/CMakeLists.txt | 1 +
.../Target/AArch64/TargetTransformInfo.cpp | 60 +++++++++++++++++
llvm/unittests/Target/X86/CMakeLists.txt | 1 +
.../Target/X86/TargetTransformInfo.cpp | 64 +++++++++++++++++++
18 files changed, 167 insertions(+), 21 deletions(-)
create mode 100644 llvm/unittests/Target/AArch64/TargetTransformInfo.cpp
create mode 100644 llvm/unittests/Target/X86/TargetTransformInfo.cpp
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 38e8b9da213974..4956fc2c4303b0 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -665,6 +665,12 @@ class TargetTransformInfo {
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
PeelingPreferences &PP) const;
+ /// Return true if folding a constant offset with the given GlobalValue
+ /// (representing a GlobalAddress) is legal. It is frequently not legal
+ /// in PIC relocation models.
+ /// Caller must guarantee that GlobalValue represents a global address.
+ bool isOffsetFoldingLegal(const GlobalValue *GV) const;
+
/// Targets can implement their own combinations for target-specific
/// intrinsics. This function will be called from the InstCombine pass every
/// time a target-specific intrinsic is encountered.
@@ -1880,6 +1886,7 @@ class TargetTransformInfo::Concept {
APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3,
std::function<void(Instruction *, unsigned, APInt, APInt &)>
SimplifyAndSetOp) = 0;
+ virtual bool isOffsetFoldingLegal(const GlobalValue *GV) const = 0;
virtual bool isLegalAddImmediate(int64_t Imm) = 0;
virtual bool isLegalAddScalableImmediate(int64_t Imm) = 0;
virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
@@ -2348,6 +2355,9 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
SimplifyAndSetOp);
}
+ bool isOffsetFoldingLegal(const GlobalValue *GV) const override {
+ return Impl.isOffsetFoldingLegal(GV);
+ }
bool isLegalAddImmediate(int64_t Imm) override {
return Impl.isLegalAddImmediate(Imm);
}
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index d208a710bb27fd..bb81f4defcd633 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -220,6 +220,8 @@ class TargetTransformInfoImplBase {
void getPeelingPreferences(Loop *, ScalarEvolution &,
TTI::PeelingPreferences &) const {}
+ bool isOffsetFoldingLegal(const GlobalValue *GV) const { return false; }
+
bool isLegalAddImmediate(int64_t Imm) const { return false; }
bool isLegalAddScalableImmediate(int64_t Imm) const { return false; }
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 890c2b8ca36e11..8059191c6dd471 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -354,6 +354,10 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
return getTLI()->getPreferredLargeGEPBaseOffset(MinOffset, MaxOffset);
}
+ bool isOffsetFoldingLegal(const GlobalValue *GV) const {
+ return getTLI()->isOffsetFoldingLegal(GV);
+ }
+
unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
Type *ScalarValTy) const {
auto &&IsSupportedByTarget = [this, ScalarMemTy, ScalarValTy](unsigned VF) {
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 9ccdbab008aec8..72fd58c0f87536 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -2811,6 +2811,10 @@ class TargetLoweringBase {
Type *Ty, unsigned AddrSpace,
Instruction *I = nullptr) const;
+ virtual bool isOffsetFoldingLegal(const GlobalValue *GV) const {
+ return false;
+ }
+
/// Returns true if the targets addressing mode can target thread local
/// storage (TLS).
virtual bool addressingModeSupportsTLS(const GlobalValue &) const {
@@ -3862,7 +3866,7 @@ class TargetLowering : public TargetLoweringBase {
/// Return true if folding a constant offset with the given GlobalAddress is
/// legal. It is frequently not legal in PIC relocation models.
- virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+ bool isOffsetFoldingLegal(const GlobalValue *GV) const override;
/// On x86, return true if the operand with index OpNo is a CALL or JUMP
/// instruction, which can use either a memory constraint or an address
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index dcde78925bfa98..5e80c40c41a286 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -396,6 +396,10 @@ void TargetTransformInfo::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
return TTIImpl->getPeelingPreferences(L, SE, PP);
}
+bool TargetTransformInfo::isOffsetFoldingLegal(const GlobalValue *GV) const {
+ return TTIImpl->isOffsetFoldingLegal(GV);
+}
+
bool TargetTransformInfo::isLegalAddImmediate(int64_t Imm) const {
return TTIImpl->isLegalAddImmediate(Imm);
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index f827eb559a01cf..9ad75fd328ceea 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1161,7 +1161,8 @@ bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
}
} else {
if (auto *GA = dyn_cast<GlobalAddressSDNode>(N0.getOperand(1)))
- if (GA->getOpcode() == ISD::GlobalAddress && TLI.isOffsetFoldingLegal(GA))
+ if (GA->getOpcode() == ISD::GlobalAddress &&
+ TLI.isOffsetFoldingLegal(GA->getGlobal()))
return false;
for (SDNode *Node : N->uses()) {
@@ -4006,7 +4007,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
// If the relocation model supports it, consider symbol offsets.
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
- if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
+ if (!LegalOperations && TLI.isOffsetFoldingLegal(GA->getGlobal())) {
// fold (sub Sym+c1, Sym+c2) -> c1-c2
if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
if (GA->getGlobal() == GB->getGlobal())
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index c3a7df5361cd45..9a8b6ae7da4053 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -6339,7 +6339,7 @@ SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode, EVT VT,
const SDNode *N2) {
if (GA->getOpcode() != ISD::GlobalAddress)
return SDValue();
- if (!TLI->isOffsetFoldingLegal(GA))
+ if (!TLI->isOffsetFoldingLegal(GA->getGlobal()))
return SDValue();
auto *C2 = dyn_cast<ConstantSDNode>(N2);
if (!C2)
@@ -13112,7 +13112,7 @@ SDNode *SelectionDAG::isConstantIntBuildVectorOrConstantInt(SDValue N) const {
// constant integer.
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N))
if (GA->getOpcode() == ISD::GlobalAddress &&
- TLI->isOffsetFoldingLegal(GA))
+ TLI->isOffsetFoldingLegal(GA->getGlobal()))
return GA;
if ((N.getOpcode() == ISD::SPLAT_VECTOR) &&
isa<ConstantSDNode>(N.getOperand(0)))
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index bef70dcb71f567..df63617c2d6883 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -485,10 +485,8 @@ SDValue TargetLowering::expandIndirectJTBranch(const SDLoc &dl, SDValue Value,
return DAG.getNode(ISD::BRIND, dl, MVT::Other, Chain, Addr);
}
-bool
-TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+bool TargetLowering::isOffsetFoldingLegal(const GlobalValue *GV) const {
const TargetMachine &TM = getTargetMachine();
- const GlobalValue *GV = GA->getGlobal();
// If the address is not even local to this DSO we will have to load it from
// a got and then add the offset.
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index f0c3afc4f9b5d5..7e7bd9a924d9b6 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -11282,8 +11282,7 @@ SDValue AArch64TargetLowering::LowerShiftParts(SDValue Op,
return DAG.getMergeValues({Lo, Hi}, SDLoc(Op));
}
-bool AArch64TargetLowering::isOffsetFoldingLegal(
- const GlobalAddressSDNode *GA) const {
+bool AArch64TargetLowering::isOffsetFoldingLegal(const GlobalValue *GV) const {
// Offsets are folded in the DAG combine rather than here so that we can
// intelligently choose an offset based on the uses.
return false;
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 81e15185f985d5..eb02484bcbe95b 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -622,7 +622,7 @@ class AArch64TargetLowering : public TargetLowering {
FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo) const override;
- bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
+ bool isOffsetFoldingLegal(const GlobalValue *GV) const override;
bool isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const override;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index f8767e00949bf0..da4c5c998b5dbd 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -7551,8 +7551,7 @@ SDValue SITargetLowering::lowerBUILD_VECTOR(SDValue Op,
return DAG.getNode(ISD::BITCAST, SL, VT, Or);
}
-bool
-SITargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+bool SITargetLowering::isOffsetFoldingLegal(const GlobalValue *GV) const {
// OSes that use ELF REL relocations (instead of RELA) can only store a
// 32-bit addend in the instruction, so it is not safe to allow offset folding
// which can create arbitrary 64-bit addends. (This is only a problem for
@@ -7565,10 +7564,10 @@ SITargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
return false;
// We can fold offsets for anything that doesn't require a GOT relocation.
- return (GA->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
- GA->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
- GA->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) &&
- !shouldEmitGOTReloc(GA->getGlobal());
+ return (GV->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
+ GV->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
+ GV->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) &&
+ !shouldEmitGOTReloc(GV);
}
static SDValue
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 1f198a92c0fa6a..e9137614ecdf20 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -364,7 +364,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
bool isTypeDesirableForOp(unsigned Op, EVT VT) const override;
- bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
+ bool isOffsetFoldingLegal(const GlobalValue *GV) const override;
unsigned combineRepeatedFPDivisors() const override {
// Combine multiple FDIVs with the same divisor into multiple FMULs by the
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 75d16a42d0205a..ed8a9ffd65dc84 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -20974,8 +20974,7 @@ SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
return IsStrict ? DAG.getMergeValues({Result, Chain}, Loc) : Result;
}
-bool
-ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+bool ARMTargetLowering::isOffsetFoldingLegal(const GlobalValue *GV) const {
// The ARM target isn't yet aware of offsets.
return false;
}
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index a255e9b6fc365f..758b09786f05d2 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -595,7 +595,7 @@ class VectorType;
bool
isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override;
- bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
+ bool isOffsetFoldingLegal(const GlobalValue *GV) const override;
/// isFPImmLegal - Returns true if the target can instruction select the
/// specified FP immediate natively. If false, the legalizer will
diff --git a/llvm/unittests/Target/AArch64/CMakeLists.txt b/llvm/unittests/Target/AArch64/CMakeLists.txt
index f53668373efee9..726fa46ba97eb0 100644
--- a/llvm/unittests/Target/AArch64/CMakeLists.txt
+++ b/llvm/unittests/Target/AArch64/CMakeLists.txt
@@ -30,4 +30,5 @@ add_llvm_target_unittest(AArch64Tests
SMEAttributesTest.cpp
AArch64SVESchedPseudoTest.cpp
Immediates.cpp
+ TargetTransformInfo.cpp
)
diff --git a/llvm/unittests/Target/AArch64/TargetTransformInfo.cpp b/llvm/unittests/Target/AArch64/TargetTransformInfo.cpp
new file mode 100644
index 00000000000000..b87675b9230650
--- /dev/null
+++ b/llvm/unittests/Target/AArch64/TargetTransformInfo.cpp
@@ -0,0 +1,60 @@
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "AArch64Subtarget.h"
+#include "AArch64TargetMachine.h"
+#include "llvm/AsmParser/Parser.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/TargetSelect.h"
+
+#include "gtest/gtest.h"
+#include <initializer_list>
+#include <memory>
+
+using namespace llvm;
+
+namespace {
+
+static std::unique_ptr<Module> parseIR(LLVMContext &C, const char *IR) {
+ SMDiagnostic Err;
+ std::unique_ptr<Module> Mod = parseAssemblyString(IR, Err, C);
+ if (!Mod)
+ Err.print(__FILE__, errs());
+ return Mod;
+}
+
+TEST(TargetTransformInfo, isOffsetFoldingLegal) {
+ LLVMInitializeAArch64TargetInfo();
+ LLVMInitializeAArch64Target();
+ LLVMInitializeAArch64TargetMC();
+
+ LLVMContext Ctx;
+ std::unique_ptr<Module> M = parseIR(Ctx, R"(
+ target triple = "aarch64-unknown-linux-gnu"
+
+ @Base1 = dso_local constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @Base1_foo, ptr @Base1_bar] }
+ @Base2 = constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @Base1_foo, ptr @Base1_bar] }
+
+ define void @Base1_bar(ptr %this) {
+ ret void
+ }
+
+ declare i32 @Base1_foo(ptr)
+ )");
+
+ std::string Error;
+ const Target *T = TargetRegistry::lookupTarget(M->getTargetTriple(), Error);
+ std::unique_ptr<TargetMachine> TM(T->createTargetMachine(
+ M->getTargetTriple(), "generic", "", TargetOptions(), std::nullopt,
+ std::nullopt, CodeGenOptLevel::Default));
+
+ ASSERT_FALSE(TM->isPositionIndependent());
+
+ TargetTransformInfo TTI =
+ TM->getTargetTransformInfo(*M->getFunction("Base1_bar"));
+
+ EXPECT_FALSE(TTI.isOffsetFoldingLegal(M->getNamedValue("Base1")));
+ EXPECT_FALSE(TTI.isOffsetFoldingLegal(M->getNamedValue("Base2")));
+}
+} // namespace
diff --git a/llvm/unittests/Target/X86/CMakeLists.txt b/llvm/unittests/Target/X86/CMakeLists.txt
index b011681aa3b95a..236ab8aebf4f6a 100644
--- a/llvm/unittests/Target/X86/CMakeLists.txt
+++ b/llvm/unittests/Target/X86/CMakeLists.txt
@@ -23,5 +23,6 @@ set(LLVM_LINK_COMPONENTS
add_llvm_unittest(X86Tests
MachineSizeOptsTest.cpp
+ TargetTransformInfo.cpp
TernlogTest.cpp
)
diff --git a/llvm/unittests/Target/X86/TargetTransformInfo.cpp b/llvm/unittests/Target/X86/TargetTransformInfo.cpp
new file mode 100644
index 00000000000000..f38062c8691277
--- /dev/null
+++ b/llvm/unittests/Target/X86/TargetTransformInfo.cpp
@@ -0,0 +1,64 @@
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include "llvm/AsmParser/Parser.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Target/TargetMachine.h"
+
+#include "gtest/gtest.h"
+#include <initializer_list>
+#include <memory>
+
+using namespace llvm;
+
+namespace {
+
+static std::unique_ptr<Module> parseIR(LLVMContext &C, const char *IR) {
+ SMDiagnostic Err;
+ std::unique_ptr<Module> Mod = parseAssemblyString(IR, Err, C);
+ if (!Mod)
+ Err.print(__FILE__, errs());
+ return Mod;
+}
+
+TEST(TargetTransformInfo, isOffsetFoldingLegal) {
+ LLVMInitializeX86TargetInfo();
+ LLVMInitializeX86Target();
+ LLVMInitializeX86TargetMC();
+
+ LLVMContext Ctx;
+ std::unique_ptr<Module> M = parseIR(Ctx, R"(
+
+ target triple = "x86_64-unknown-linux-gnu"
+ @Base1 = dso_local constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @Base1_foo, ptr @Base1_bar] }
+ @Base2 = constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @Base1_foo, ptr @Base1_bar] }
+
+ define void @Base1_bar(ptr %this) {
+ ret void
+ }
+
+ declare i32 @Base1_foo(ptr)
+ )");
+
+ std::string Error;
+ const Target *T = TargetRegistry::lookupTarget(M->getTargetTriple(), Error);
+ std::unique_ptr<TargetMachine> TM(T->createTargetMachine(
+ M->getTargetTriple(), "generic", "", TargetOptions(), std::nullopt,
+ std::nullopt, CodeGenOptLevel::Default));
+ ASSERT_FALSE(TM->isPositionIndependent());
+
+ Function *Func = M->getFunction("Base1_bar");
+
+ TargetTransformInfo TTI = TM->getTargetTransformInfo(*Func);
+
+ // Base1 is dso_local.
+ EXPECT_TRUE(TTI.isOffsetFoldingLegal(M->getNamedValue("Base1")));
+
+ // Base2 is not dso_local.
+ EXPECT_FALSE(TTI.isOffsetFoldingLegal(M->getNamedValue("Base2")));
+}
+} // namespace
>From 8560fe3ac859e64545272f5a16fe4dcb023b05f9 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Wed, 21 Aug 2024 21:53:53 -0700
Subject: [PATCH 2/4] update comment
---
llvm/include/llvm/Analysis/TargetTransformInfo.h | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 4956fc2c4303b0..4183c4f90f40fd 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -665,10 +665,11 @@ class TargetTransformInfo {
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
PeelingPreferences &PP) const;
- /// Return true if folding a constant offset with the given GlobalValue
- /// (representing a GlobalAddress) is legal. It is frequently not legal
- /// in PIC relocation models.
- /// Caller must guarantee that GlobalValue represents a global address.
+ /// Given a global address represented by a global value and a constant
+ /// offset relative to it, return true if the constant offset is foldable into
+ /// the global value when lowering the global address. The constant is usually
+ /// not foldable PIC relocation models. Caller must guarantee that GlobalValue
+ /// represents a global address.
bool isOffsetFoldingLegal(const GlobalValue *GV) const;
/// Targets can implement their own combinations for target-specific
>From d86204bc27e0f0f405428a60f0f88d82812dbdb4 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Wed, 21 Aug 2024 23:20:24 -0700
Subject: [PATCH 3/4] [TypeProf][IndirectCallProm]Tweak max number of vtables
by considering vtable address point materialization cost
---
.../Instrumentation/IndirectCallPromotion.cpp | 35 ++++-
.../Other/new-pm-thinlto-postlink-defaults.ll | 2 +-
.../new-pm-thinlto-postlink-pgo-defaults.ll | 2 +-
...-pm-thinlto-postlink-samplepgo-defaults.ll | 2 +-
.../new-pm-thinlto-prelink-pgo-defaults.ll | 2 +-
.../icp_vtable_address_point_cost.ll | 139 ++++++++++++++++++
6 files changed, 172 insertions(+), 10 deletions(-)
create mode 100644 llvm/test/Transforms/PGOProfile/icp_vtable_address_point_cost.ll
diff --git a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
index 0d1f506986379d..72497b5f71a413 100644
--- a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
+++ b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
@@ -20,6 +20,7 @@
#include "llvm/Analysis/IndirectCallVisitor.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/TypeMetadataUtils.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
@@ -303,6 +304,8 @@ class IndirectCallPromoter {
Function &F;
Module &M;
+ const TargetTransformInfo &TTI;
+
ProfileSummaryInfo *PSI = nullptr;
// Symtab that maps indirect call profile values to function names and
@@ -369,6 +372,9 @@ class IndirectCallPromoter {
ArrayRef<PromotionCandidate> Candidates,
uint64_t TotalCount);
+ bool addressPointLoweringCostComparable(
+ const VTableGUIDCountsMap &VTableGUIDCounts) const;
+
// Given an indirect callsite and the list of function candidates, compute
// the following vtable information in output parameters and return vtable
// pointer if type profiles exist.
@@ -391,12 +397,12 @@ class IndirectCallPromoter {
public:
IndirectCallPromoter(
- Function &Func, Module &M, ProfileSummaryInfo *PSI,
- InstrProfSymtab *Symtab, bool SamplePGO,
+ Function &Func, Module &M, const TargetTransformInfo &TTI,
+ ProfileSummaryInfo *PSI, InstrProfSymtab *Symtab, bool SamplePGO,
const VirtualCallSiteTypeInfoMap &VirtualCSInfo,
VTableAddressPointOffsetValMap &VTableAddressPointOffsetVal,
OptimizationRemarkEmitter &ORE)
- : F(Func), M(M), PSI(PSI), Symtab(Symtab), SamplePGO(SamplePGO),
+ : F(Func), M(M), TTI(TTI), PSI(PSI), Symtab(Symtab), SamplePGO(SamplePGO),
VirtualCSInfo(VirtualCSInfo),
VTableAddressPointOffsetVal(VTableAddressPointOffsetVal), ORE(ORE) {}
IndirectCallPromoter(const IndirectCallPromoter &) = delete;
@@ -833,6 +839,18 @@ bool IndirectCallPromoter::processFunction(ProfileSummaryInfo *PSI) {
return Changed;
}
+bool IndirectCallPromoter::addressPointLoweringCostComparable(
+ const VTableGUIDCountsMap &VTableGUIDAndCounts) const {
+ for (auto &[GUID, Count] : VTableGUIDAndCounts) {
+ GlobalVariable *VTable = Symtab->getGlobalVariable(GUID);
+ assert(VTable != nullptr &&
+ "guaranteed by IndirectCallPromoter::computeVTableInfos");
+ if (!TTI.isOffsetFoldingLegal(VTable))
+ return false;
+ }
+ return true;
+}
+
// TODO: Return false if the function addressing and vtable load instructions
// cannot sink to indirect fallback.
bool IndirectCallPromoter::isProfitableToCompareVTables(
@@ -877,8 +895,12 @@ bool IndirectCallPromoter::isProfitableToCompareVTables(
// chain for the subsequent candidates. Set its value to 1 for non-last
// candidate and allow option to override it for the last candidate.
int MaxNumVTable = 1;
- if (I == CandidateSize - 1)
- MaxNumVTable = ICPMaxNumVTableLastCandidate;
+ if (I == CandidateSize - 1) {
+ if (addressPointLoweringCostComparable(VTableGUIDAndCounts))
+ MaxNumVTable = 2;
+ if (ICPMaxNumVTableLastCandidate.getNumOccurrences())
+ MaxNumVTable = ICPMaxNumVTableLastCandidate;
+ }
if ((int)Candidate.AddressPoints.size() > MaxNumVTable) {
LLVM_DEBUG(dbgs() << " allow at most " << MaxNumVTable << " and got "
@@ -991,8 +1013,9 @@ static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO,
auto &FAM =
MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+ auto &TTI = FAM.getResult<TargetIRAnalysis>(F);
- IndirectCallPromoter CallPromoter(F, M, PSI, &Symtab, SamplePGO,
+ IndirectCallPromoter CallPromoter(F, M, TTI, PSI, &Symtab, SamplePGO,
VirtualCSInfo,
VTableAddressPointOffsetVal, ORE);
bool FuncChanged = CallPromoter.processFunction(PSI);
diff --git a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll
index 064362eabbf839..60921edd7cee23 100644
--- a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll
@@ -43,12 +43,12 @@
; CHECK-POSTLINK-O-NEXT: Running analysis: ProfileSummaryAnalysis
; CHECK-POSTLINK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
; CHECK-POSTLINK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis
+; CHECK-POSTLINK-O-NEXT: Running analysis: TargetIRAnalysis on foo
; CHECK-O-NEXT: Running pass: OpenMPOptPass
; CHECK-POSTLINK-O-NEXT: Running pass: LowerTypeTestsPass
; CHECK-O-NEXT: Running pass: IPSCCPPass
; CHECK-O-NEXT: Running analysis: DominatorTreeAnalysis
; CHECK-O-NEXT: Running analysis: AssumptionAnalysis
-; CHECK-O-NEXT: Running analysis: TargetIRAnalysis
; CHECK-O-NEXT: Running pass: CalledValuePropagationPass
; CHECK-O-NEXT: Running pass: GlobalOptPass
; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis
diff --git a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
index 19a44867e434ac..30ec5dc3b9e23d 100644
--- a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
@@ -28,12 +28,12 @@
; CHECK-O-NEXT: Running analysis: ProfileSummaryAnalysis
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
; CHECK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis
+; CHECK-O-NEXT: Running analysis: TargetIRAnalysis on foo
; CHECK-O-NEXT: Running pass: OpenMPOptPass
; CHECK-O-NEXT: Running pass: LowerTypeTestsPass
; CHECK-O-NEXT: Running pass: IPSCCPPass
; CHECK-O-NEXT: Running analysis: DominatorTreeAnalysis
; CHECK-O-NEXT: Running analysis: AssumptionAnalysis
-; CHECK-O-NEXT: Running analysis: TargetIRAnalysis
; CHECK-O-NEXT: Running pass: CalledValuePropagationPass
; CHECK-O-NEXT: Running pass: GlobalOptPass
; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis
diff --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
index e5aebc4850e6db..21364b6a66f872 100644
--- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
@@ -37,12 +37,12 @@
; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
; CHECK-O-NEXT: Running pass: PGOIndirectCallPromotion
; CHECK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis
+; CHECK-O-NEXT: Running analysis: TargetIRAnalysis
; CHECK-O-NEXT: Running pass: OpenMPOptPass
; CHECK-O-NEXT: Running pass: LowerTypeTestsPass
; CHECK-O-NEXT: Running pass: IPSCCPPass
; CHECK-O-NEXT: Running analysis: DominatorTreeAnalysis
; CHECK-O-NEXT: Running analysis: AssumptionAnalysis
-; CHECK-O-NEXT: Running analysis: TargetIRAnalysis
; CHECK-O-NEXT: Running pass: CalledValuePropagationPass
; CHECK-O-NEXT: Running pass: GlobalOptPass
; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis
diff --git a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
index cb49cbd22d60c0..153856112f20e7 100644
--- a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll
@@ -84,6 +84,7 @@
; CHECK-O-NEXT: Running pass: PGOIndirectCallPromotion on
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy
; CHECK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis on foo
+; CHECK-O-NEXT: Running analysis: TargetIRAnalysis
; CHECK-O-NEXT: Running pass: AlwaysInlinerPass
; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass
; CHECK-O-NEXT: Running analysis: InlineAdvisorAnalysis
@@ -106,7 +107,6 @@
; CHECK-O-NEXT: Running pass: SROAPass
; CHECK-O-NEXT: Running analysis: DominatorTreeAnalysis
; CHECK-O-NEXT: Running analysis: AssumptionAnalysis
-; CHECK-O-NEXT: Running analysis: TargetIRAnalysis
; CHECK-O-NEXT: Running pass: EarlyCSEPass
; CHECK-O-NEXT: Running analysis: MemorySSAAnalysis
; CHECK-O-NEXT: Running analysis: AAManager
diff --git a/llvm/test/Transforms/PGOProfile/icp_vtable_address_point_cost.ll b/llvm/test/Transforms/PGOProfile/icp_vtable_address_point_cost.ll
new file mode 100644
index 00000000000000..95a95d4ac7c669
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/icp_vtable_address_point_cost.ll
@@ -0,0 +1,139 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+
+; RUN: opt < %s -passes='pgo-icall-prom' -pass-remarks=pgo-icall-prom -enable-vtable-profile-use -S 2>&1 | FileCheck %s --check-prefixes=VTABLE-COMMON,VTABLE-CMP
+; RUN: opt < %s -passes='pgo-icall-prom' -pass-remarks=pgo-icall-prom -enable-vtable-profile-use -icp-max-num-vtable-last-candidate=1 -S 2>&1 | FileCheck %s --check-prefixes=VTABLE-COMMON,FUNC-CMP
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at Base1 = dso_local constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @Base1_foo, ptr @Base1_bar] }, !type !0
+ at Base2 = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @Base2_foo] }, !type !2
+ at Base3 = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @Base3_foo] }, !type !6
+
+ at Derived1 = constant { [3 x ptr], [4 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @Base2_foo], [4 x ptr] [ptr null, ptr null, ptr @Base1_foo, ptr @Derived1_bar] }, !type !1, !type !2, !type !3
+ at Derived2 = constant { [3 x ptr], [3 x ptr], [4 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @Base3_foo], [3 x ptr] [ptr null, ptr null, ptr @Base2_foo], [4 x ptr] [ptr null, ptr null, ptr @Base1_foo, ptr @Derived2_bar] }, !type !4, !type !5, !type !6, !type !7
+ at Derived3 = dso_local constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr null, ptr @Base1_foo, ptr @Base1_bar] }, !type !0, !type !8
+
+; VTABLE-CMP: remark: <unknown>:0:0: Promote indirect call to Derived1_bar with count 600 out of 1600, sink 2 instruction(s) and compare 1 vtable(s): {Derived1}
+; VTABLE-CMP: remark: <unknown>:0:0: Promote indirect call to Derived2_bar with count 500 out of 1000, sink 2 instruction(s) and compare 1 vtable(s): {Derived2}
+; VTABLE-CMP: remark: <unknown>:0:0: Promote indirect call to Base1_bar with count 400 out of 500, sink 2 instruction(s) and compare 2 vtable(s): {Derived3, Base1}
+
+define void @test(ptr %d) {
+; VTABLE-CMP-LABEL: define void @test(
+; VTABLE-CMP-SAME: ptr [[D:%.*]]) {
+; VTABLE-CMP-NEXT: [[ENTRY:.*:]]
+; VTABLE-CMP-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[D]], align 8, !prof [[PROF9:![0-9]+]]
+; VTABLE-CMP-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"Base1")
+; VTABLE-CMP-NEXT: tail call void @llvm.assume(i1 [[TMP0]])
+; VTABLE-CMP-NEXT: [[TMP1:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @Derived1, i32 40)
+; VTABLE-CMP-NEXT: br i1 [[TMP1]], label %[[IF_TRUE_DIRECT_TARG:.*]], label %[[IF_FALSE_ORIG_INDIRECT:.*]], !prof [[PROF10:![0-9]+]]
+; VTABLE-CMP: [[IF_TRUE_DIRECT_TARG]]:
+; VTABLE-CMP-NEXT: call void @Derived1_bar(ptr [[D]])
+; VTABLE-CMP-NEXT: br label %[[IF_END_ICP:.*]]
+; VTABLE-CMP: [[IF_FALSE_ORIG_INDIRECT]]:
+; VTABLE-CMP-NEXT: [[TMP2:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @Derived2, i32 64)
+; VTABLE-CMP-NEXT: br i1 [[TMP2]], label %[[IF_TRUE_DIRECT_TARG1:.*]], label %[[IF_FALSE_ORIG_INDIRECT2:.*]], !prof [[PROF11:![0-9]+]]
+; VTABLE-CMP: [[IF_TRUE_DIRECT_TARG1]]:
+; VTABLE-CMP-NEXT: call void @Derived2_bar(ptr [[D]])
+; VTABLE-CMP-NEXT: br label %[[IF_END_ICP3:.*]]
+; VTABLE-CMP: [[IF_FALSE_ORIG_INDIRECT2]]:
+; VTABLE-CMP-NEXT: [[TMP3:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @Base1, i32 16)
+; VTABLE-CMP-NEXT: [[TMP4:%.*]] = icmp eq ptr [[VTABLE]], getelementptr inbounds (i8, ptr @Derived3, i32 16)
+; VTABLE-CMP-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
+; VTABLE-CMP-NEXT: br i1 [[TMP5]], label %[[IF_TRUE_DIRECT_TARG4:.*]], label %[[IF_FALSE_ORIG_INDIRECT5:.*]], !prof [[PROF12:![0-9]+]]
+; VTABLE-CMP: [[IF_TRUE_DIRECT_TARG4]]:
+; VTABLE-CMP-NEXT: call void @Base1_bar(ptr [[D]])
+; VTABLE-CMP-NEXT: br label %[[IF_END_ICP6:.*]]
+; VTABLE-CMP: [[IF_FALSE_ORIG_INDIRECT5]]:
+; VTABLE-CMP-NEXT: [[VFN:%.*]] = getelementptr inbounds ptr, ptr [[VTABLE]], i64 1
+; VTABLE-CMP-NEXT: [[TMP6:%.*]] = load ptr, ptr [[VFN]], align 8
+; VTABLE-CMP-NEXT: call void [[TMP6]](ptr [[D]])
+; VTABLE-CMP-NEXT: br label %[[IF_END_ICP6]]
+; VTABLE-CMP: [[IF_END_ICP6]]:
+; VTABLE-CMP-NEXT: br label %[[IF_END_ICP3]]
+; VTABLE-CMP: [[IF_END_ICP3]]:
+; VTABLE-CMP-NEXT: br label %[[IF_END_ICP]]
+; VTABLE-CMP: [[IF_END_ICP]]:
+; VTABLE-CMP-NEXT: ret void
+;
+; FUNC-CMP-LABEL: define void @test(
+; FUNC-CMP-SAME: ptr [[D:%.*]]) {
+; FUNC-CMP-NEXT: [[ENTRY:.*:]]
+; FUNC-CMP-NEXT: [[VTABLE:%.*]] = load ptr, ptr [[D]], align 8, !prof [[PROF9:![0-9]+]]
+; FUNC-CMP-NEXT: [[TMP0:%.*]] = tail call i1 @llvm.type.test(ptr [[VTABLE]], metadata !"Base1")
+; FUNC-CMP-NEXT: tail call void @llvm.assume(i1 [[TMP0]])
+; FUNC-CMP-NEXT: [[VFN:%.*]] = getelementptr inbounds ptr, ptr [[VTABLE]], i64 1
+; FUNC-CMP-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VFN]], align 8
+; FUNC-CMP-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP1]], @Derived1_bar
+; FUNC-CMP-NEXT: br i1 [[TMP2]], label %[[IF_TRUE_DIRECT_TARG:.*]], label %[[IF_FALSE_ORIG_INDIRECT:.*]], !prof [[PROF10:![0-9]+]]
+; FUNC-CMP: [[IF_TRUE_DIRECT_TARG]]:
+; FUNC-CMP-NEXT: call void @Derived1_bar(ptr [[D]])
+; FUNC-CMP-NEXT: br label %[[IF_END_ICP:.*]]
+; FUNC-CMP: [[IF_FALSE_ORIG_INDIRECT]]:
+; FUNC-CMP-NEXT: [[TMP3:%.*]] = icmp eq ptr [[TMP1]], @Derived2_bar
+; FUNC-CMP-NEXT: br i1 [[TMP3]], label %[[IF_TRUE_DIRECT_TARG1:.*]], label %[[IF_FALSE_ORIG_INDIRECT2:.*]], !prof [[PROF11:![0-9]+]]
+; FUNC-CMP: [[IF_TRUE_DIRECT_TARG1]]:
+; FUNC-CMP-NEXT: call void @Derived2_bar(ptr [[D]])
+; FUNC-CMP-NEXT: br label %[[IF_END_ICP3:.*]]
+; FUNC-CMP: [[IF_FALSE_ORIG_INDIRECT2]]:
+; FUNC-CMP-NEXT: [[TMP4:%.*]] = icmp eq ptr [[TMP1]], @Base1_bar
+; FUNC-CMP-NEXT: br i1 [[TMP4]], label %[[IF_TRUE_DIRECT_TARG4:.*]], label %[[IF_FALSE_ORIG_INDIRECT5:.*]], !prof [[PROF12:![0-9]+]]
+; FUNC-CMP: [[IF_TRUE_DIRECT_TARG4]]:
+; FUNC-CMP-NEXT: call void @Base1_bar(ptr [[D]])
+; FUNC-CMP-NEXT: br label %[[IF_END_ICP6:.*]]
+; FUNC-CMP: [[IF_FALSE_ORIG_INDIRECT5]]:
+; FUNC-CMP-NEXT: call void [[TMP1]](ptr [[D]])
+; FUNC-CMP-NEXT: br label %[[IF_END_ICP6]]
+; FUNC-CMP: [[IF_END_ICP6]]:
+; FUNC-CMP-NEXT: br label %[[IF_END_ICP3]]
+; FUNC-CMP: [[IF_END_ICP3]]:
+; FUNC-CMP-NEXT: br label %[[IF_END_ICP]]
+; FUNC-CMP: [[IF_END_ICP]]:
+; FUNC-CMP-NEXT: ret void
+;
+entry:
+ %vtable = load ptr, ptr %d, !prof !9
+ %0 = tail call i1 @llvm.type.test(ptr %vtable, metadata !"Base1")
+ tail call void @llvm.assume(i1 %0)
+ %vfn = getelementptr inbounds ptr, ptr %vtable, i64 1
+ %1 = load ptr, ptr %vfn
+ call void %1(ptr %d), !prof !10
+ ret void
+}
+
+define void @Base1_bar(ptr %this) {
+ ret void
+}
+
+define void @Derived1_bar(ptr %this) {
+ ret void
+}
+
+define void @Derived2_bar(ptr %this) {
+ ret void
+}
+
+
+declare i1 @llvm.type.test(ptr, metadata)
+declare void @llvm.assume(i1)
+declare i32 @Base2_foo(ptr)
+declare i32 @Base1_foo(ptr)
+declare void @Base3_foo(ptr)
+
+!0 = !{i64 16, !"Base1"}
+!1 = !{i64 40, !"Base1"}
+!2 = !{i64 16, !"Base2"}
+!3 = !{i64 16, !"Derived1"}
+!4 = !{i64 64, !"Base1"}
+!5 = !{i64 40, !"Base2"}
+!6 = !{i64 16, !"Base3"}
+!7 = !{i64 16, !"Derived2"}
+!8 = !{i64 16, !"Derived3"}
+!9 = !{!"VP", i32 2, i64 1600, i64 -4123858694673519054, i64 600, i64 -7211198353767973908, i64 500, i64 -3574436251470806727, i64 200, i64 6288809125658696740, i64 200, i64 12345678, i64 100}
+!10 = !{!"VP", i32 0, i64 1600, i64 3827408714133779784, i64 600, i64 5837445539218476403, i64 500, i64 -9064955852395570538, i64 400, i64 56781234, i64 100}
+;.
+; VTABLE-COMMON: [[PROF9]] = !{!"VP", i32 2, i64 100, i64 12345678, i64 100}
+; VTABLE-COMMON: [[PROF10]] = !{!"branch_weights", i32 600, i32 1000}
+; VTABLE-COMMON: [[PROF11]] = !{!"branch_weights", i32 500, i32 500}
+; VTABLE-COMMON: [[PROF12]] = !{!"branch_weights", i32 400, i32 100}
+
>From 946735a3cb7a71f364217c56aa089746f9db6c0d Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Thu, 22 Aug 2024 10:12:05 -0700
Subject: [PATCH 4/4] update comment and function name
---
.../Instrumentation/IndirectCallPromotion.cpp | 22 ++++++++++++-------
1 file changed, 14 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
index 72497b5f71a413..13f47215a62fe5 100644
--- a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
+++ b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
@@ -372,7 +372,9 @@ class IndirectCallPromoter {
ArrayRef<PromotionCandidate> Candidates,
uint64_t TotalCount);
- bool addressPointLoweringCostComparable(
+ // Returns true if vtable address points are foldable into the vtable for
+ // each vtable represented by GUIDs (i.e., VTableGUIDCounts keys).
+ bool isAddressPointOffsetFoldable(
const VTableGUIDCountsMap &VTableGUIDCounts) const;
// Given an indirect callsite and the list of function candidates, compute
@@ -839,7 +841,7 @@ bool IndirectCallPromoter::processFunction(ProfileSummaryInfo *PSI) {
return Changed;
}
-bool IndirectCallPromoter::addressPointLoweringCostComparable(
+bool IndirectCallPromoter::isAddressPointOffsetFoldable(
const VTableGUIDCountsMap &VTableGUIDAndCounts) const {
for (auto &[GUID, Count] : VTableGUIDAndCounts) {
GlobalVariable *VTable = Symtab->getGlobalVariable(GUID);
@@ -889,15 +891,19 @@ bool IndirectCallPromoter::isProfitableToCompareVTables(
RemainingVTableCount -= Candidate.Count;
// 'MaxNumVTable' limits the number of vtables to make vtable comparison
- // profitable. Comparing multiple vtables for one function candidate will
- // insert additional instructions on the hot path, and allowing more than
- // one vtable for non last candidates may or may not elongate the dependency
- // chain for the subsequent candidates. Set its value to 1 for non-last
- // candidate and allow option to override it for the last candidate.
+ // profitable. Set it to 1 control icache pressure and conditionally allow
+ // an additional vtable for the last function candidate.
int MaxNumVTable = 1;
if (I == CandidateSize - 1) {
- if (addressPointLoweringCostComparable(VTableGUIDAndCounts))
+ // Comparing an additional vtable inserts `icmp vptr, @vtable +
+ // address-point-offset` IR instruction.
+ // `@vtable + address-point-offset` will lower to a standalone instruction
+ // if the constant offset is not foldable, and fold into the cmp
+ // instruction otherwise. Allow an additional vtable by default if address
+ // point offset is foldlable.
+ if (isAddressPointOffsetFoldable(VTableGUIDAndCounts))
MaxNumVTable = 2;
+ // Allow command line override.
if (ICPMaxNumVTableLastCandidate.getNumOccurrences())
MaxNumVTable = ICPMaxNumVTableLastCandidate;
}
More information about the llvm-commits
mailing list